maint: add doc/coverage to .gitignore
[coreutils.git] / src / factor.c
blob97af6caae9a39ba32eb682da59c454164558f507
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synced 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or if USE_SQUFOF is defined to 1, try that first.
54 Status of found factors are checked again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #if HAVE_GMP
108 # include <gmp.h>
109 # if !HAVE_DECL_MPZ_INITS
110 # include <stdarg.h>
111 # endif
112 #endif
114 #include <assert.h>
116 #include "system.h"
117 #include "die.h"
118 #include "error.h"
119 #include "full-write.h"
120 #include "quote.h"
121 #include "readtokens.h"
122 #include "xstrtol.h"
124 /* The official name of this program (e.g., no 'g' prefix). */
125 #define PROGRAM_NAME "factor"
127 #define AUTHORS \
128 proper_name ("Paul Rubin"), \
129 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
130 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
132 /* Token delimiters when reading from a file. */
133 #define DELIM "\n\t "
135 #ifndef USE_LONGLONG_H
136 /* With the way we use longlong.h, it's only safe to use
137 when UWtype = UHWtype, as there were various cases
138 (as can be seen in the history for longlong.h) where
139 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
140 to avoid compile time or run time issues. */
141 # if LONG_MAX == INTMAX_MAX
142 # define USE_LONGLONG_H 1
143 # endif
144 #endif
146 #if USE_LONGLONG_H
148 /* Make definitions for longlong.h to make it do what it can do for us */
150 /* bitcount for uintmax_t */
151 # if UINTMAX_MAX == UINT32_MAX
152 # define W_TYPE_SIZE 32
153 # elif UINTMAX_MAX == UINT64_MAX
154 # define W_TYPE_SIZE 64
155 # elif UINTMAX_MAX == UINT128_MAX
156 # define W_TYPE_SIZE 128
157 # endif
159 # define UWtype uintmax_t
160 # define UHWtype unsigned long int
161 # undef UDWtype
162 # if HAVE_ATTRIBUTE_MODE
163 typedef unsigned int UQItype __attribute__ ((mode (QI)));
164 typedef int SItype __attribute__ ((mode (SI)));
165 typedef unsigned int USItype __attribute__ ((mode (SI)));
166 typedef int DItype __attribute__ ((mode (DI)));
167 typedef unsigned int UDItype __attribute__ ((mode (DI)));
168 # else
169 typedef unsigned char UQItype;
170 typedef long SItype;
171 typedef unsigned long int USItype;
172 # if HAVE_LONG_LONG_INT
173 typedef long long int DItype;
174 typedef unsigned long long int UDItype;
175 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
176 typedef long int DItype;
177 typedef unsigned long int UDItype;
178 # endif
179 # endif
180 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
181 # define ASSERT(x) /* FIXME make longlong.h really standalone */
182 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
183 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
184 # ifndef __GMP_GNUC_PREREQ
185 # define __GMP_GNUC_PREREQ(a,b) 1
186 # endif
188 /* These stub macros are only used in longlong.h in certain system compiler
189 combinations, so ensure usage to avoid -Wunused-macros warnings. */
190 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
191 ASSERT (1)
192 __GMP_DECLSPEC
193 # endif
195 # if _ARCH_PPC
196 # define HAVE_HOST_CPU_FAMILY_powerpc 1
197 # endif
198 # include "longlong.h"
199 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
200 const unsigned char factor_clz_tab[129] =
202 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
203 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
204 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
205 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
208 # endif
210 #else /* not USE_LONGLONG_H */
212 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
213 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
214 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
215 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
217 #endif
219 #if !defined __clz_tab && !defined UHWtype
220 /* Without this seemingly useless conditional, gcc -Wunused-macros
221 warns that each of the two tested macros is unused on Fedora 18.
222 FIXME: this is just an ugly band-aid. Fix it properly. */
223 #endif
225 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
226 #define MAX_NFACTS 26
228 enum
230 DEV_DEBUG_OPTION = CHAR_MAX + 1
233 static struct option const long_options[] =
235 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
236 {GETOPT_HELP_OPTION_DECL},
237 {GETOPT_VERSION_OPTION_DECL},
238 {NULL, 0, NULL, 0}
241 struct factors
243 uintmax_t plarge[2]; /* Can have a single large factor */
244 uintmax_t p[MAX_NFACTS];
245 unsigned char e[MAX_NFACTS];
246 unsigned char nfactors;
249 #if HAVE_GMP
250 struct mp_factors
252 mpz_t *p;
253 unsigned long int *e;
254 unsigned long int nfactors;
256 #endif
258 static void factor (uintmax_t, uintmax_t, struct factors *);
260 #ifndef umul_ppmm
261 # define umul_ppmm(w1, w0, u, v) \
262 do { \
263 uintmax_t __x0, __x1, __x2, __x3; \
264 unsigned long int __ul, __vl, __uh, __vh; \
265 uintmax_t __u = (u), __v = (v); \
267 __ul = __ll_lowpart (__u); \
268 __uh = __ll_highpart (__u); \
269 __vl = __ll_lowpart (__v); \
270 __vh = __ll_highpart (__v); \
272 __x0 = (uintmax_t) __ul * __vl; \
273 __x1 = (uintmax_t) __ul * __vh; \
274 __x2 = (uintmax_t) __uh * __vl; \
275 __x3 = (uintmax_t) __uh * __vh; \
277 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
278 __x1 += __x2; /* but this indeed can */ \
279 if (__x1 < __x2) /* did we get it? */ \
280 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
282 (w1) = __x3 + __ll_highpart (__x1); \
283 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
284 } while (0)
285 #endif
287 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
288 /* Define our own, not needing normalization. This function is
289 currently not performance critical, so keep it simple. Similar to
290 the mod macro below. */
291 # undef udiv_qrnnd
292 # define udiv_qrnnd(q, r, n1, n0, d) \
293 do { \
294 uintmax_t __d1, __d0, __q, __r1, __r0; \
296 assert ((n1) < (d)); \
297 __d1 = (d); __d0 = 0; \
298 __r1 = (n1); __r0 = (n0); \
299 __q = 0; \
300 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
302 rsh2 (__d1, __d0, __d1, __d0, 1); \
303 __q <<= 1; \
304 if (ge2 (__r1, __r0, __d1, __d0)) \
306 __q++; \
307 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
310 (r) = __r0; \
311 (q) = __q; \
312 } while (0)
313 #endif
315 #if !defined add_ssaaaa
316 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
317 do { \
318 uintmax_t _add_x; \
319 _add_x = (al) + (bl); \
320 (sh) = (ah) + (bh) + (_add_x < (al)); \
321 (sl) = _add_x; \
322 } while (0)
323 #endif
325 #define rsh2(rh, rl, ah, al, cnt) \
326 do { \
327 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
328 (rh) = (ah) >> (cnt); \
329 } while (0)
331 #define lsh2(rh, rl, ah, al, cnt) \
332 do { \
333 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
334 (rl) = (al) << (cnt); \
335 } while (0)
337 #define ge2(ah, al, bh, bl) \
338 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
340 #define gt2(ah, al, bh, bl) \
341 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
343 #ifndef sub_ddmmss
344 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
345 do { \
346 uintmax_t _cy; \
347 _cy = (al) < (bl); \
348 (rl) = (al) - (bl); \
349 (rh) = (ah) - (bh) - _cy; \
350 } while (0)
351 #endif
353 #ifndef count_leading_zeros
354 # define count_leading_zeros(count, x) do { \
355 uintmax_t __clz_x = (x); \
356 unsigned int __clz_c; \
357 for (__clz_c = 0; \
358 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
359 __clz_c += 8) \
360 __clz_x <<= 8; \
361 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
362 __clz_x <<= 1; \
363 (count) = __clz_c; \
364 } while (0)
365 #endif
367 #ifndef count_trailing_zeros
368 # define count_trailing_zeros(count, x) do { \
369 uintmax_t __ctz_x = (x); \
370 unsigned int __ctz_c = 0; \
371 while ((__ctz_x & 1) == 0) \
373 __ctz_x >>= 1; \
374 __ctz_c++; \
376 (count) = __ctz_c; \
377 } while (0)
378 #endif
380 /* Requires that a < n and b <= n */
381 #define submod(r,a,b,n) \
382 do { \
383 uintmax_t _t = - (uintmax_t) (a < b); \
384 (r) = ((n) & _t) + (a) - (b); \
385 } while (0)
387 #define addmod(r,a,b,n) \
388 submod ((r), (a), ((n) - (b)), (n))
390 /* Modular two-word addition and subtraction. For performance reasons, the
391 most significant bit of n1 must be clear. The destination variables must be
392 distinct from the mod operand. */
393 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
394 do { \
395 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
396 if (ge2 ((r1), (r0), (n1), (n0))) \
397 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
398 } while (0)
399 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
400 do { \
401 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
402 if ((intmax_t) (r1) < 0) \
403 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
404 } while (0)
406 #define HIGHBIT_TO_MASK(x) \
407 (((intmax_t)-1 >> 1) < 0 \
408 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
409 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
410 ? UINTMAX_MAX : (uintmax_t) 0))
412 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
413 Requires that d1 != 0. */
414 static uintmax_t
415 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
417 int cntd, cnta;
419 assert (d1 != 0);
421 if (a1 == 0)
423 *r1 = 0;
424 return a0;
427 count_leading_zeros (cntd, d1);
428 count_leading_zeros (cnta, a1);
429 int cnt = cntd - cnta;
430 lsh2 (d1, d0, d1, d0, cnt);
431 for (int i = 0; i < cnt; i++)
433 if (ge2 (a1, a0, d1, d0))
434 sub_ddmmss (a1, a0, a1, a0, d1, d0);
435 rsh2 (d1, d0, d1, d0, 1);
438 *r1 = a1;
439 return a0;
442 static uintmax_t _GL_ATTRIBUTE_CONST
443 gcd_odd (uintmax_t a, uintmax_t b)
445 if ( (b & 1) == 0)
447 uintmax_t t = b;
448 b = a;
449 a = t;
451 if (a == 0)
452 return b;
454 /* Take out least significant one bit, to make room for sign */
455 b >>= 1;
457 for (;;)
459 uintmax_t t;
460 uintmax_t bgta;
462 while ((a & 1) == 0)
463 a >>= 1;
464 a >>= 1;
466 t = a - b;
467 if (t == 0)
468 return (a << 1) + 1;
470 bgta = HIGHBIT_TO_MASK (t);
472 /* b <-- min (a, b) */
473 b += (bgta & t);
475 /* a <-- |a - b| */
476 a = (t ^ bgta) - bgta;
480 static uintmax_t
481 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
483 assert (b0 & 1);
485 if ( (a0 | a1) == 0)
487 *r1 = b1;
488 return b0;
491 while ((a0 & 1) == 0)
492 rsh2 (a1, a0, a1, a0, 1);
494 for (;;)
496 if ((b1 | a1) == 0)
498 *r1 = 0;
499 return gcd_odd (b0, a0);
502 if (gt2 (a1, a0, b1, b0))
504 sub_ddmmss (a1, a0, a1, a0, b1, b0);
506 rsh2 (a1, a0, a1, a0, 1);
507 while ((a0 & 1) == 0);
509 else if (gt2 (b1, b0, a1, a0))
511 sub_ddmmss (b1, b0, b1, b0, a1, a0);
513 rsh2 (b1, b0, b1, b0, 1);
514 while ((b0 & 1) == 0);
516 else
517 break;
520 *r1 = a1;
521 return a0;
524 static void
525 factor_insert_multiplicity (struct factors *factors,
526 uintmax_t prime, unsigned int m)
528 unsigned int nfactors = factors->nfactors;
529 uintmax_t *p = factors->p;
530 unsigned char *e = factors->e;
532 /* Locate position for insert new or increment e. */
533 int i;
534 for (i = nfactors - 1; i >= 0; i--)
536 if (p[i] <= prime)
537 break;
540 if (i < 0 || p[i] != prime)
542 for (int j = nfactors - 1; j > i; j--)
544 p[j + 1] = p[j];
545 e[j + 1] = e[j];
547 p[i + 1] = prime;
548 e[i + 1] = m;
549 factors->nfactors = nfactors + 1;
551 else
553 e[i] += m;
557 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
559 static void
560 factor_insert_large (struct factors *factors,
561 uintmax_t p1, uintmax_t p0)
563 if (p1 > 0)
565 assert (factors->plarge[1] == 0);
566 factors->plarge[0] = p0;
567 factors->plarge[1] = p1;
569 else
570 factor_insert (factors, p0);
573 #if HAVE_GMP
575 # if !HAVE_DECL_MPZ_INITS
577 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
578 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
580 static void
581 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
583 va_list ap;
585 va_start (ap, mpz_single_init);
587 mpz_t *mpz;
588 while ((mpz = va_arg (ap, mpz_t *)))
589 mpz_single_init (*mpz);
591 va_end (ap);
593 # endif
595 static void mp_factor (mpz_t, struct mp_factors *);
597 static void
598 mp_factor_init (struct mp_factors *factors)
600 factors->p = NULL;
601 factors->e = NULL;
602 factors->nfactors = 0;
605 static void
606 mp_factor_clear (struct mp_factors *factors)
608 for (unsigned int i = 0; i < factors->nfactors; i++)
609 mpz_clear (factors->p[i]);
611 free (factors->p);
612 free (factors->e);
615 static void
616 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
618 unsigned long int nfactors = factors->nfactors;
619 mpz_t *p = factors->p;
620 unsigned long int *e = factors->e;
621 long i;
623 /* Locate position for insert new or increment e. */
624 for (i = nfactors - 1; i >= 0; i--)
626 if (mpz_cmp (p[i], prime) <= 0)
627 break;
630 if (i < 0 || mpz_cmp (p[i], prime) != 0)
632 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
633 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
635 mpz_init (p[nfactors]);
636 for (long j = nfactors - 1; j > i; j--)
638 mpz_set (p[j + 1], p[j]);
639 e[j + 1] = e[j];
641 mpz_set (p[i + 1], prime);
642 e[i + 1] = 1;
644 factors->p = p;
645 factors->e = e;
646 factors->nfactors = nfactors + 1;
648 else
650 e[i] += 1;
654 static void
655 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
657 mpz_t pz;
659 mpz_init_set_ui (pz, prime);
660 mp_factor_insert (factors, pz);
661 mpz_clear (pz);
663 #endif /* HAVE_GMP */
666 /* Number of bits in an uintmax_t. */
667 enum { W = sizeof (uintmax_t) * CHAR_BIT };
669 /* Verify that uintmax_t does not have holes in its representation. */
670 verify (UINTMAX_MAX >> (W - 1) == 1);
672 #define P(a,b,c,d) a,
673 static const unsigned char primes_diff[] = {
674 #include "primes.h"
675 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
677 #undef P
679 #define PRIMES_PTAB_ENTRIES \
680 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
682 #define P(a,b,c,d) b,
683 static const unsigned char primes_diff8[] = {
684 #include "primes.h"
685 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
687 #undef P
689 struct primes_dtab
691 uintmax_t binv, lim;
694 #define P(a,b,c,d) {c,d},
695 static const struct primes_dtab primes_dtab[] = {
696 #include "primes.h"
697 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
699 #undef P
701 /* Verify that uintmax_t is not wider than
702 the integers used to generate primes.h. */
703 verify (W <= WIDE_UINT_BITS);
705 /* debugging for developers. Enables devmsg().
706 This flag is used only in the GMP code. */
707 static bool dev_debug = false;
709 /* Prove primality or run probabilistic tests. */
710 static bool flag_prove_primality = PROVE_PRIMALITY;
712 /* Number of Miller-Rabin tests to run when not proving primality. */
713 #define MR_REPS 25
715 static void
716 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
717 unsigned int off)
719 for (unsigned int j = 0; j < off; j++)
720 p += primes_diff[i + j];
721 factor_insert (factors, p);
724 /* Trial division with odd primes uses the following trick.
726 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
727 consider the case t < B (this is the second loop below).
729 From our tables we get
731 binv = p^{-1} (mod B)
732 lim = floor ( (B-1) / p ).
734 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
735 (and all quotients in this range occur for some t).
737 Then t = q * p is true also (mod B), and p is invertible we get
739 q = t * binv (mod B).
741 Next, assume that t is *not* divisible by p. Since multiplication
742 by binv (mod B) is a one-to-one mapping,
744 t * binv (mod B) > lim,
746 because all the smaller values are already taken.
748 This can be summed up by saying that the function
750 q(t) = binv * t (mod B)
752 is a permutation of the range 0 <= t < B, with the curious property
753 that it maps the multiples of p onto the range 0 <= q <= lim, in
754 order, and the non-multiples of p onto the range lim < q < B.
757 static uintmax_t
758 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
759 struct factors *factors)
761 if (t0 % 2 == 0)
763 unsigned int cnt;
765 if (t0 == 0)
767 count_trailing_zeros (cnt, t1);
768 t0 = t1 >> cnt;
769 t1 = 0;
770 cnt += W_TYPE_SIZE;
772 else
774 count_trailing_zeros (cnt, t0);
775 rsh2 (t1, t0, t1, t0, cnt);
778 factor_insert_multiplicity (factors, 2, cnt);
781 uintmax_t p = 3;
782 unsigned int i;
783 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
785 for (;;)
787 uintmax_t q1, q0, hi, lo _GL_UNUSED;
789 q0 = t0 * primes_dtab[i].binv;
790 umul_ppmm (hi, lo, q0, p);
791 if (hi > t1)
792 break;
793 hi = t1 - hi;
794 q1 = hi * primes_dtab[i].binv;
795 if (LIKELY (q1 > primes_dtab[i].lim))
796 break;
797 t1 = q1; t0 = q0;
798 factor_insert (factors, p);
800 p += primes_diff[i + 1];
802 if (t1p)
803 *t1p = t1;
805 #define DIVBLOCK(I) \
806 do { \
807 for (;;) \
809 q = t0 * pd[I].binv; \
810 if (LIKELY (q > pd[I].lim)) \
811 break; \
812 t0 = q; \
813 factor_insert_refind (factors, p, i + 1, I); \
815 } while (0)
817 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
819 uintmax_t q;
820 const struct primes_dtab *pd = &primes_dtab[i];
821 DIVBLOCK (0);
822 DIVBLOCK (1);
823 DIVBLOCK (2);
824 DIVBLOCK (3);
825 DIVBLOCK (4);
826 DIVBLOCK (5);
827 DIVBLOCK (6);
828 DIVBLOCK (7);
830 p += primes_diff8[i];
831 if (p * p > t0)
832 break;
835 return t0;
838 #if HAVE_GMP
839 static void
840 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
842 mpz_t q;
843 unsigned long int p;
845 devmsg ("[trial division] ");
847 mpz_init (q);
849 p = mpz_scan1 (t, 0);
850 mpz_div_2exp (t, t, p);
851 while (p)
853 mp_factor_insert_ui (factors, 2);
854 --p;
857 p = 3;
858 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
860 if (! mpz_divisible_ui_p (t, p))
862 p += primes_diff[i++];
863 if (mpz_cmp_ui (t, p * p) < 0)
864 break;
866 else
868 mpz_tdiv_q_ui (t, t, p);
869 mp_factor_insert_ui (factors, p);
873 mpz_clear (q);
875 #endif
877 /* Entry i contains (2i+1)^(-1) mod 2^8. */
878 static const unsigned char binvert_table[128] =
880 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
881 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
882 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
883 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
884 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
885 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
886 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
887 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
888 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
889 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
890 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
891 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
892 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
893 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
894 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
895 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
898 /* Compute n^(-1) mod B, using a Newton iteration. */
899 #define binv(inv,n) \
900 do { \
901 uintmax_t __n = (n); \
902 uintmax_t __inv; \
904 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
905 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
906 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
907 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
909 if (W_TYPE_SIZE > 64) \
911 int __invbits = 64; \
912 do { \
913 __inv = 2 * __inv - __inv * __inv * __n; \
914 __invbits *= 2; \
915 } while (__invbits < W_TYPE_SIZE); \
918 (inv) = __inv; \
919 } while (0)
921 /* q = u / d, assuming d|u. */
922 #define divexact_21(q1, q0, u1, u0, d) \
923 do { \
924 uintmax_t _di, _q0; \
925 binv (_di, (d)); \
926 _q0 = (u0) * _di; \
927 if ((u1) >= (d)) \
929 uintmax_t _p1, _p0 _GL_UNUSED; \
930 umul_ppmm (_p1, _p0, _q0, d); \
931 (q1) = ((u1) - _p1) * _di; \
932 (q0) = _q0; \
934 else \
936 (q0) = _q0; \
937 (q1) = 0; \
939 } while (0)
941 /* x B (mod n). */
942 #define redcify(r_prim, r, n) \
943 do { \
944 uintmax_t _redcify_q _GL_UNUSED; \
945 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
946 } while (0)
948 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
949 #define redcify2(r1, r0, x, n1, n0) \
950 do { \
951 uintmax_t _r1, _r0, _i; \
952 if ((x) < (n1)) \
954 _r1 = (x); _r0 = 0; \
955 _i = W_TYPE_SIZE; \
957 else \
959 _r1 = 0; _r0 = (x); \
960 _i = 2*W_TYPE_SIZE; \
962 while (_i-- > 0) \
964 lsh2 (_r1, _r0, _r1, _r0, 1); \
965 if (ge2 (_r1, _r0, (n1), (n0))) \
966 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
968 (r1) = _r1; \
969 (r0) = _r0; \
970 } while (0)
972 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
973 Both a and b must be in redc form, the result will be in redc form too. */
974 static inline uintmax_t
975 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
977 uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
979 umul_ppmm (rh, rl, a, b);
980 q = rl * mi;
981 umul_ppmm (th, tl, q, m);
982 xh = rh - th;
983 if (rh < th)
984 xh += m;
986 return xh;
989 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
990 Both a and b must be in redc form, the result will be in redc form too.
991 For performance reasons, the most significant bit of m must be clear. */
992 static uintmax_t
993 mulredc2 (uintmax_t *r1p,
994 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
995 uintmax_t m1, uintmax_t m0, uintmax_t mi)
997 uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
998 mi = -mi;
999 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
1000 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
1001 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
1003 /* First compute a0 * <b1, b0> B^{-1}
1004 +-----+
1005 |a0 b0|
1006 +--+--+--+
1007 |a0 b1|
1008 +--+--+--+
1009 |q0 m0|
1010 +--+--+--+
1011 |q0 m1|
1012 -+--+--+--+
1013 |r1|r0| 0|
1014 +--+--+--+
1016 umul_ppmm (t1, t0, a0, b0);
1017 umul_ppmm (r1, r0, a0, b1);
1018 q = mi * t0;
1019 umul_ppmm (p1, p0, q, m0);
1020 umul_ppmm (s1, s0, q, m1);
1021 r0 += (t0 != 0); /* Carry */
1022 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1023 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1024 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1026 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1027 +-----+
1028 |a1 b0|
1029 +--+--+
1030 |r1|r0|
1031 +--+--+--+
1032 |a1 b1|
1033 +--+--+--+
1034 |q1 m0|
1035 +--+--+--+
1036 |q1 m1|
1037 -+--+--+--+
1038 |r1|r0| 0|
1039 +--+--+--+
1041 umul_ppmm (t1, t0, a1, b0);
1042 umul_ppmm (s1, s0, a1, b1);
1043 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1044 q = mi * t0;
1045 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1046 umul_ppmm (p1, p0, q, m0);
1047 umul_ppmm (s1, s0, q, m1);
1048 r0 += (t0 != 0); /* Carry */
1049 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1050 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1051 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1053 if (ge2 (r1, r0, m1, m0))
1054 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1056 *r1p = r1;
1057 return r0;
1060 static uintmax_t _GL_ATTRIBUTE_CONST
1061 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1063 uintmax_t y = one;
1065 if (e & 1)
1066 y = b;
1068 while (e != 0)
1070 b = mulredc (b, b, n, ni);
1071 e >>= 1;
1073 if (e & 1)
1074 y = mulredc (y, b, n, ni);
1077 return y;
1080 static uintmax_t
1081 powm2 (uintmax_t *r1m,
1082 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1083 uintmax_t ni, const uintmax_t *one)
1085 uintmax_t r1, r0, b1, b0, n1, n0;
1086 unsigned int i;
1087 uintmax_t e;
1089 b0 = bp[0];
1090 b1 = bp[1];
1091 n0 = np[0];
1092 n1 = np[1];
1094 r0 = one[0];
1095 r1 = one[1];
1097 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1099 if (e & 1)
1101 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1102 r1 = *r1m;
1104 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1105 b1 = *r1m;
1107 for (e = ep[1]; e > 0; e >>= 1)
1109 if (e & 1)
1111 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1112 r1 = *r1m;
1114 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1115 b1 = *r1m;
1117 *r1m = r1;
1118 return r0;
1121 static bool _GL_ATTRIBUTE_CONST
1122 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1123 unsigned int k, uintmax_t one)
1125 uintmax_t y = powm (b, q, n, ni, one);
1127 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1129 if (y == one || y == nm1)
1130 return true;
1132 for (unsigned int i = 1; i < k; i++)
1134 y = mulredc (y, y, n, ni);
1136 if (y == nm1)
1137 return true;
1138 if (y == one)
1139 return false;
1141 return false;
1144 static bool
1145 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1146 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1148 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1150 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1151 y1 = r1m;
1153 if (y0 == one[0] && y1 == one[1])
1154 return true;
1156 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1158 if (y0 == nm1_0 && y1 == nm1_1)
1159 return true;
1161 for (unsigned int i = 1; i < k; i++)
1163 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1164 y1 = r1m;
1166 if (y0 == nm1_0 && y1 == nm1_1)
1167 return true;
1168 if (y0 == one[0] && y1 == one[1])
1169 return false;
1171 return false;
1174 #if HAVE_GMP
1175 static bool
1176 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1177 mpz_srcptr q, unsigned long int k)
1179 mpz_powm (y, x, q, n);
1181 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1182 return true;
1184 for (unsigned long int i = 1; i < k; i++)
1186 mpz_powm_ui (y, y, 2, n);
1187 if (mpz_cmp (y, nm1) == 0)
1188 return true;
1189 if (mpz_cmp_ui (y, 1) == 0)
1190 return false;
1192 return false;
1194 #endif
1196 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1197 have been observed. The average seem to be about 2. */
1198 static bool
1199 prime_p (uintmax_t n)
1201 int k;
1202 bool is_prime;
1203 uintmax_t a_prim, one, ni;
1204 struct factors factors;
1206 if (n <= 1)
1207 return false;
1209 /* We have already casted out small primes. */
1210 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1211 return true;
1213 /* Precomputation for Miller-Rabin. */
1214 uintmax_t q = n - 1;
1215 for (k = 0; (q & 1) == 0; k++)
1216 q >>= 1;
1218 uintmax_t a = 2;
1219 binv (ni, n); /* ni <- 1/n mod B */
1220 redcify (one, 1, n);
1221 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1223 /* Perform a Miller-Rabin test, finds most composites quickly. */
1224 if (!millerrabin (n, ni, a_prim, q, k, one))
1225 return false;
1227 if (flag_prove_primality)
1229 /* Factor n-1 for Lucas. */
1230 factor (0, n - 1, &factors);
1233 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1234 number composite. */
1235 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1237 if (flag_prove_primality)
1239 is_prime = true;
1240 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1242 is_prime
1243 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1246 else
1248 /* After enough Miller-Rabin runs, be content. */
1249 is_prime = (r == MR_REPS - 1);
1252 if (is_prime)
1253 return true;
1255 a += primes_diff[r]; /* Establish new base. */
1257 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1258 on most processors, since it avoids udiv_qrnnd. If we go down the
1259 udiv_qrnnd_preinv path, this code should be replaced. */
1261 uintmax_t s1, s0;
1262 umul_ppmm (s1, s0, one, a);
1263 if (LIKELY (s1 == 0))
1264 a_prim = s0 % n;
1265 else
1267 uintmax_t dummy _GL_UNUSED;
1268 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1272 if (!millerrabin (n, ni, a_prim, q, k, one))
1273 return false;
1276 error (0, 0, _("Lucas prime test failure. This should not happen"));
1277 abort ();
1280 static bool
1281 prime2_p (uintmax_t n1, uintmax_t n0)
1283 uintmax_t q[2], nm1[2];
1284 uintmax_t a_prim[2];
1285 uintmax_t one[2];
1286 uintmax_t na[2];
1287 uintmax_t ni;
1288 unsigned int k;
1289 struct factors factors;
1291 if (n1 == 0)
1292 return prime_p (n0);
1294 nm1[1] = n1 - (n0 == 0);
1295 nm1[0] = n0 - 1;
1296 if (nm1[0] == 0)
1298 count_trailing_zeros (k, nm1[1]);
1300 q[0] = nm1[1] >> k;
1301 q[1] = 0;
1302 k += W_TYPE_SIZE;
1304 else
1306 count_trailing_zeros (k, nm1[0]);
1307 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1310 uintmax_t a = 2;
1311 binv (ni, n0);
1312 redcify2 (one[1], one[0], 1, n1, n0);
1313 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1315 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1316 na[0] = n0;
1317 na[1] = n1;
1319 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1320 return false;
1322 if (flag_prove_primality)
1324 /* Factor n-1 for Lucas. */
1325 factor (nm1[1], nm1[0], &factors);
1328 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1329 number composite. */
1330 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1332 bool is_prime;
1333 uintmax_t e[2], y[2];
1335 if (flag_prove_primality)
1337 is_prime = true;
1338 if (factors.plarge[1])
1340 uintmax_t pi;
1341 binv (pi, factors.plarge[0]);
1342 e[0] = pi * nm1[0];
1343 e[1] = 0;
1344 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1345 is_prime = (y[0] != one[0] || y[1] != one[1]);
1347 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1349 /* FIXME: We always have the factor 2. Do we really need to
1350 handle it here? We have done the same powering as part
1351 of millerrabin. */
1352 if (factors.p[i] == 2)
1353 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1354 else
1355 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1356 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1357 is_prime = (y[0] != one[0] || y[1] != one[1]);
1360 else
1362 /* After enough Miller-Rabin runs, be content. */
1363 is_prime = (r == MR_REPS - 1);
1366 if (is_prime)
1367 return true;
1369 a += primes_diff[r]; /* Establish new base. */
1370 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1372 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1373 return false;
1376 error (0, 0, _("Lucas prime test failure. This should not happen"));
1377 abort ();
1380 #if HAVE_GMP
1381 static bool
1382 mp_prime_p (mpz_t n)
1384 bool is_prime;
1385 mpz_t q, a, nm1, tmp;
1386 struct mp_factors factors;
1388 if (mpz_cmp_ui (n, 1) <= 0)
1389 return false;
1391 /* We have already casted out small primes. */
1392 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1393 return true;
1395 mpz_inits (q, a, nm1, tmp, NULL);
1397 /* Precomputation for Miller-Rabin. */
1398 mpz_sub_ui (nm1, n, 1);
1400 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1401 unsigned long int k = mpz_scan1 (nm1, 0);
1402 mpz_tdiv_q_2exp (q, nm1, k);
1404 mpz_set_ui (a, 2);
1406 /* Perform a Miller-Rabin test, finds most composites quickly. */
1407 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1409 is_prime = false;
1410 goto ret2;
1413 if (flag_prove_primality)
1415 /* Factor n-1 for Lucas. */
1416 mpz_set (tmp, nm1);
1417 mp_factor (tmp, &factors);
1420 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1421 number composite. */
1422 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1424 if (flag_prove_primality)
1426 is_prime = true;
1427 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1429 mpz_divexact (tmp, nm1, factors.p[i]);
1430 mpz_powm (tmp, a, tmp, n);
1431 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1434 else
1436 /* After enough Miller-Rabin runs, be content. */
1437 is_prime = (r == MR_REPS - 1);
1440 if (is_prime)
1441 goto ret1;
1443 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1445 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1447 is_prime = false;
1448 goto ret1;
1452 error (0, 0, _("Lucas prime test failure. This should not happen"));
1453 abort ();
1455 ret1:
1456 if (flag_prove_primality)
1457 mp_factor_clear (&factors);
1458 ret2:
1459 mpz_clears (q, a, nm1, tmp, NULL);
1461 return is_prime;
1463 #endif
1465 static void
1466 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1467 struct factors *factors)
1469 uintmax_t x, z, y, P, t, ni, g;
1471 unsigned long int k = 1;
1472 unsigned long int l = 1;
1474 redcify (P, 1, n);
1475 addmod (x, P, P, n); /* i.e., redcify(2) */
1476 y = z = x;
1478 while (n != 1)
1480 assert (a < n);
1482 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1484 for (;;)
1488 x = mulredc (x, x, n, ni);
1489 addmod (x, x, a, n);
1491 submod (t, z, x, n);
1492 P = mulredc (P, t, n, ni);
1494 if (k % 32 == 1)
1496 if (gcd_odd (P, n) != 1)
1497 goto factor_found;
1498 y = x;
1501 while (--k != 0);
1503 z = x;
1504 k = l;
1505 l = 2 * l;
1506 for (unsigned long int i = 0; i < k; i++)
1508 x = mulredc (x, x, n, ni);
1509 addmod (x, x, a, n);
1511 y = x;
1514 factor_found:
1517 y = mulredc (y, y, n, ni);
1518 addmod (y, y, a, n);
1520 submod (t, z, y, n);
1521 g = gcd_odd (t, n);
1523 while (g == 1);
1525 if (n == g)
1527 /* Found n itself as factor. Restart with different params. */
1528 factor_using_pollard_rho (n, a + 1, factors);
1529 return;
1532 n = n / g;
1534 if (!prime_p (g))
1535 factor_using_pollard_rho (g, a + 1, factors);
1536 else
1537 factor_insert (factors, g);
1539 if (prime_p (n))
1541 factor_insert (factors, n);
1542 break;
1545 x = x % n;
1546 z = z % n;
1547 y = y % n;
1551 static void
1552 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1553 struct factors *factors)
1555 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1557 unsigned long int k = 1;
1558 unsigned long int l = 1;
1560 redcify2 (P1, P0, 1, n1, n0);
1561 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1562 y1 = z1 = x1;
1563 y0 = z0 = x0;
1565 while (n1 != 0 || n0 != 1)
1567 binv (ni, n0);
1569 for (;;)
1573 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1574 x1 = r1m;
1575 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1577 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1578 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1579 P1 = r1m;
1581 if (k % 32 == 1)
1583 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1584 if (g1 != 0 || g0 != 1)
1585 goto factor_found;
1586 y1 = x1; y0 = x0;
1589 while (--k != 0);
1591 z1 = x1; z0 = x0;
1592 k = l;
1593 l = 2 * l;
1594 for (unsigned long int i = 0; i < k; i++)
1596 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1597 x1 = r1m;
1598 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1600 y1 = x1; y0 = x0;
1603 factor_found:
1606 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1607 y1 = r1m;
1608 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1610 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1611 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1613 while (g1 == 0 && g0 == 1);
1615 if (g1 == 0)
1617 /* The found factor is one word, and > 1. */
1618 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1620 if (!prime_p (g0))
1621 factor_using_pollard_rho (g0, a + 1, factors);
1622 else
1623 factor_insert (factors, g0);
1625 else
1627 /* The found factor is two words. This is highly unlikely, thus hard
1628 to trigger. Please be careful before you change this code! */
1629 uintmax_t ginv;
1631 if (n1 == g1 && n0 == g0)
1633 /* Found n itself as factor. Restart with different params. */
1634 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1635 return;
1638 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1639 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1640 n1 = 0; /* modulo B, ignoring the high divisor word. */
1642 if (!prime2_p (g1, g0))
1643 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1644 else
1645 factor_insert_large (factors, g1, g0);
1648 if (n1 == 0)
1650 if (prime_p (n0))
1652 factor_insert (factors, n0);
1653 break;
1656 factor_using_pollard_rho (n0, a, factors);
1657 return;
1660 if (prime2_p (n1, n0))
1662 factor_insert_large (factors, n1, n0);
1663 break;
1666 x0 = mod2 (&x1, x1, x0, n1, n0);
1667 z0 = mod2 (&z1, z1, z0, n1, n0);
1668 y0 = mod2 (&y1, y1, y0, n1, n0);
1672 #if HAVE_GMP
1673 static void
1674 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1675 struct mp_factors *factors)
1677 mpz_t x, z, y, P;
1678 mpz_t t, t2;
1680 devmsg ("[pollard-rho (%lu)] ", a);
1682 mpz_inits (t, t2, NULL);
1683 mpz_init_set_si (y, 2);
1684 mpz_init_set_si (x, 2);
1685 mpz_init_set_si (z, 2);
1686 mpz_init_set_ui (P, 1);
1688 unsigned long long int k = 1;
1689 unsigned long long int l = 1;
1691 while (mpz_cmp_ui (n, 1) != 0)
1693 for (;;)
1697 mpz_mul (t, x, x);
1698 mpz_mod (x, t, n);
1699 mpz_add_ui (x, x, a);
1701 mpz_sub (t, z, x);
1702 mpz_mul (t2, P, t);
1703 mpz_mod (P, t2, n);
1705 if (k % 32 == 1)
1707 mpz_gcd (t, P, n);
1708 if (mpz_cmp_ui (t, 1) != 0)
1709 goto factor_found;
1710 mpz_set (y, x);
1713 while (--k != 0);
1715 mpz_set (z, x);
1716 k = l;
1717 l = 2 * l;
1718 for (unsigned long long int i = 0; i < k; i++)
1720 mpz_mul (t, x, x);
1721 mpz_mod (x, t, n);
1722 mpz_add_ui (x, x, a);
1724 mpz_set (y, x);
1727 factor_found:
1730 mpz_mul (t, y, y);
1731 mpz_mod (y, t, n);
1732 mpz_add_ui (y, y, a);
1734 mpz_sub (t, z, y);
1735 mpz_gcd (t, t, n);
1737 while (mpz_cmp_ui (t, 1) == 0);
1739 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1741 if (!mp_prime_p (t))
1743 devmsg ("[composite factor--restarting pollard-rho] ");
1744 mp_factor_using_pollard_rho (t, a + 1, factors);
1746 else
1748 mp_factor_insert (factors, t);
1751 if (mp_prime_p (n))
1753 mp_factor_insert (factors, n);
1754 break;
1757 mpz_mod (x, x, n);
1758 mpz_mod (z, z, n);
1759 mpz_mod (y, y, n);
1762 mpz_clears (P, t2, t, z, x, y, NULL);
1764 #endif
1766 #if USE_SQUFOF
1767 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1768 algorithm is replaced, consider also returning the remainder. */
1769 static uintmax_t _GL_ATTRIBUTE_CONST
1770 isqrt (uintmax_t n)
1772 uintmax_t x;
1773 unsigned c;
1774 if (n == 0)
1775 return 0;
1777 count_leading_zeros (c, n);
1779 /* Make x > sqrt(n). This will be invariant through the loop. */
1780 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1782 for (;;)
1784 uintmax_t y = (x + n/x) / 2;
1785 if (y >= x)
1786 return x;
1788 x = y;
1792 static uintmax_t _GL_ATTRIBUTE_CONST
1793 isqrt2 (uintmax_t nh, uintmax_t nl)
1795 unsigned int shift;
1796 uintmax_t x;
1798 /* Ensures the remainder fits in an uintmax_t. */
1799 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1801 if (nh == 0)
1802 return isqrt (nl);
1804 count_leading_zeros (shift, nh);
1805 shift &= ~1;
1807 /* Make x > sqrt(n) */
1808 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1809 x <<= (W_TYPE_SIZE - shift) / 2;
1811 /* Do we need more than one iteration? */
1812 for (;;)
1814 uintmax_t r _GL_UNUSED;
1815 uintmax_t q, y;
1816 udiv_qrnnd (q, r, nh, nl, x);
1817 y = (x + q) / 2;
1819 if (y >= x)
1821 uintmax_t hi, lo;
1822 umul_ppmm (hi, lo, x + 1, x + 1);
1823 assert (gt2 (hi, lo, nh, nl));
1825 umul_ppmm (hi, lo, x, x);
1826 assert (ge2 (nh, nl, hi, lo));
1827 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1828 assert (hi == 0);
1830 return x;
1833 x = y;
1837 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1838 # define MAGIC64 0x0202021202030213ULL
1839 # define MAGIC63 0x0402483012450293ULL
1840 # define MAGIC65 0x218a019866014613ULL
1841 # define MAGIC11 0x23b
1843 /* Return the square root if the input is a square, otherwise 0. */
1844 static uintmax_t _GL_ATTRIBUTE_CONST
1845 is_square (uintmax_t x)
1847 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1848 computing the square root. */
1849 if (((MAGIC64 >> (x & 63)) & 1)
1850 && ((MAGIC63 >> (x % 63)) & 1)
1851 /* Both 0 and 64 are squares mod (65) */
1852 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1853 && ((MAGIC11 >> (x % 11) & 1)))
1855 uintmax_t r = isqrt (x);
1856 if (r*r == x)
1857 return r;
1859 return 0;
1862 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1863 static const unsigned short invtab[0x81] =
1865 0x200,
1866 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1867 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1868 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1869 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1870 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1871 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1872 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1873 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1874 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1875 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1876 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1877 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1878 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1879 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1880 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1881 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1884 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1885 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1886 # define div_smallq(q, r, u, d) \
1887 do { \
1888 if ((u) / 0x40 < (d)) \
1890 int _cnt; \
1891 uintmax_t _dinv, _mask, _q, _r; \
1892 count_leading_zeros (_cnt, (d)); \
1893 _r = (u); \
1894 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1896 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1897 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1899 else \
1901 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1902 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1904 _r -= _q*(d); \
1906 _mask = -(uintmax_t) (_r >= (d)); \
1907 (r) = _r - (_mask & (d)); \
1908 (q) = _q - _mask; \
1909 assert ( (q) * (d) + (r) == u); \
1911 else \
1913 uintmax_t _q = (u) / (d); \
1914 (r) = (u) - _q * (d); \
1915 (q) = _q; \
1917 } while (0)
1919 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1920 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1921 with 3025 = 55^2.
1923 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1924 representing G_0 = (-55, 2*4652, 8653).
1926 In the notation of the paper:
1928 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1932 t_0 = floor([q_0 + R_0] / S0) = 1
1933 R_1 = t_0 * S_0 - R_0 = 4001
1934 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1937 /* Multipliers, in order of efficiency:
1938 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1939 0.7317 3*5*7 = 105 = 1
1940 0.7820 3*5*11 = 165 = 1
1941 0.7872 3*5 = 15 = 3
1942 0.8101 3*7*11 = 231 = 3
1943 0.8155 3*7 = 21 = 1
1944 0.8284 5*7*11 = 385 = 1
1945 0.8339 5*7 = 35 = 3
1946 0.8716 3*11 = 33 = 1
1947 0.8774 3 = 3 = 3
1948 0.8913 5*11 = 55 = 3
1949 0.8972 5 = 5 = 1
1950 0.9233 7*11 = 77 = 1
1951 0.9295 7 = 7 = 3
1952 0.9934 11 = 11 = 3
1954 # define QUEUE_SIZE 50
1955 #endif
1957 #if STAT_SQUFOF
1958 # define Q_FREQ_SIZE 50
1959 /* Element 0 keeps the total */
1960 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1961 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1962 #endif
1964 #if USE_SQUFOF
1965 /* Return true on success. Expected to fail only for numbers
1966 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1967 static bool
1968 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1970 /* Uses algorithm and notation from
1972 SQUARE FORM FACTORIZATION
1973 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1975 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1978 static const unsigned int multipliers_1[] =
1979 { /* = 1 (mod 4) */
1980 105, 165, 21, 385, 33, 5, 77, 1, 0
1982 static const unsigned int multipliers_3[] =
1983 { /* = 3 (mod 4) */
1984 1155, 15, 231, 35, 3, 55, 7, 11, 0
1987 const unsigned int *m;
1989 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1991 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1992 return false;
1994 uintmax_t sqrt_n = isqrt2 (n1, n0);
1996 if (n0 == sqrt_n * sqrt_n)
1998 uintmax_t p1, p0;
2000 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
2001 assert (p0 == n0);
2003 if (n1 == p1)
2005 if (prime_p (sqrt_n))
2006 factor_insert_multiplicity (factors, sqrt_n, 2);
2007 else
2009 struct factors f;
2011 f.nfactors = 0;
2012 if (!factor_using_squfof (0, sqrt_n, &f))
2014 /* Try pollard rho instead */
2015 factor_using_pollard_rho (sqrt_n, 1, &f);
2017 /* Duplicate the new factors */
2018 for (unsigned int i = 0; i < f.nfactors; i++)
2019 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2021 return true;
2025 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2026 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2027 *m; m++)
2029 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2030 unsigned int i;
2031 unsigned int mu = *m;
2032 unsigned int qpos = 0;
2034 assert (mu * n0 % 4 == 3);
2036 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2037 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2038 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2039 mu^3 < n.
2041 However, this seems insufficient: With n = 37243139 and mu =
2042 105, we get a trivial factor, from the square 38809 = 197^2,
2043 without any corresponding Q earlier in the iteration.
2045 Requiring 64 mu^3 < n seems sufficient. */
2046 if (n1 == 0)
2048 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2049 continue;
2051 else
2053 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2054 continue;
2056 umul_ppmm (Dh, Dl, n0, mu);
2057 Dh += n1 * mu;
2059 assert (Dl % 4 != 1);
2060 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2062 S = isqrt2 (Dh, Dl);
2064 Q1 = 1;
2065 P = S;
2067 /* Square root remainder fits in one word, so ignore high part. */
2068 Q = Dl - P*P;
2069 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2070 L = isqrt (2*S);
2071 B = 2*L;
2072 L1 = mu * 2 * L;
2074 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2075 4 D. */
2077 for (i = 0; i <= B; i++)
2079 uintmax_t q, P1, t, rem;
2081 div_smallq (q, rem, S+P, Q);
2082 P1 = S - rem; /* P1 = q*Q - P */
2084 IF_LINT (assert (q > 0 && Q > 0));
2086 # if STAT_SQUFOF
2087 q_freq[0]++;
2088 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2089 # endif
2091 if (Q <= L1)
2093 uintmax_t g = Q;
2095 if ( (Q & 1) == 0)
2096 g /= 2;
2098 g /= gcd_odd (g, mu);
2100 if (g <= L)
2102 if (qpos >= QUEUE_SIZE)
2103 die (EXIT_FAILURE, 0, _("squfof queue overflow"));
2104 queue[qpos].Q = g;
2105 queue[qpos].P = P % g;
2106 qpos++;
2110 /* I think the difference can be either sign, but mod
2111 2^W_TYPE_SIZE arithmetic should be fine. */
2112 t = Q1 + q * (P - P1);
2113 Q1 = Q;
2114 Q = t;
2115 P = P1;
2117 if ( (i & 1) == 0)
2119 uintmax_t r = is_square (Q);
2120 if (r)
2122 for (unsigned int j = 0; j < qpos; j++)
2124 if (queue[j].Q == r)
2126 if (r == 1)
2127 /* Traversed entire cycle. */
2128 goto next_multiplier;
2130 /* Need the absolute value for divisibility test. */
2131 if (P >= queue[j].P)
2132 t = P - queue[j].P;
2133 else
2134 t = queue[j].P - P;
2135 if (t % r == 0)
2137 /* Delete entries up to and including entry
2138 j, which matched. */
2139 memmove (queue, queue + j + 1,
2140 (qpos - j - 1) * sizeof (queue[0]));
2141 qpos -= (j + 1);
2143 goto next_i;
2147 /* We have found a square form, which should give a
2148 factor. */
2149 Q1 = r;
2150 assert (S >= P); /* What signs are possible? */
2151 P += r * ((S - P) / r);
2153 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2154 for the case D = 2N. */
2155 /* Compute Q = (D - P*P) / Q1, but we need double
2156 precision. */
2157 uintmax_t hi, lo;
2158 umul_ppmm (hi, lo, P, P);
2159 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2160 udiv_qrnnd (Q, rem, hi, lo, Q1);
2161 assert (rem == 0);
2163 for (;;)
2165 /* Note: There appears to by a typo in the paper,
2166 Step 4a in the algorithm description says q <--
2167 floor([S+P]/\hat Q), but looking at the equations
2168 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2169 (In this code, \hat Q is Q1). */
2170 div_smallq (q, rem, S+P, Q);
2171 P1 = S - rem; /* P1 = q*Q - P */
2173 # if STAT_SQUFOF
2174 q_freq[0]++;
2175 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2176 # endif
2177 if (P == P1)
2178 break;
2179 t = Q1 + q * (P - P1);
2180 Q1 = Q;
2181 Q = t;
2182 P = P1;
2185 if ( (Q & 1) == 0)
2186 Q /= 2;
2187 Q /= gcd_odd (Q, mu);
2189 assert (Q > 1 && (n1 || Q < n0));
2191 if (prime_p (Q))
2192 factor_insert (factors, Q);
2193 else if (!factor_using_squfof (0, Q, factors))
2194 factor_using_pollard_rho (Q, 2, factors);
2196 divexact_21 (n1, n0, n1, n0, Q);
2198 if (prime2_p (n1, n0))
2199 factor_insert_large (factors, n1, n0);
2200 else
2202 if (!factor_using_squfof (n1, n0, factors))
2204 if (n1 == 0)
2205 factor_using_pollard_rho (n0, 1, factors);
2206 else
2207 factor_using_pollard_rho2 (n1, n0, 1, factors);
2211 return true;
2214 next_i:;
2216 next_multiplier:;
2218 return false;
2220 #endif
2222 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2223 results in FACTORS. */
2224 static void
2225 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2227 factors->nfactors = 0;
2228 factors->plarge[1] = 0;
2230 if (t1 == 0 && t0 < 2)
2231 return;
2233 t0 = factor_using_division (&t1, t1, t0, factors);
2235 if (t1 == 0 && t0 < 2)
2236 return;
2238 if (prime2_p (t1, t0))
2239 factor_insert_large (factors, t1, t0);
2240 else
2242 #if USE_SQUFOF
2243 if (factor_using_squfof (t1, t0, factors))
2244 return;
2245 #endif
2247 if (t1 == 0)
2248 factor_using_pollard_rho (t0, 1, factors);
2249 else
2250 factor_using_pollard_rho2 (t1, t0, 1, factors);
2254 #if HAVE_GMP
2255 /* Use Pollard-rho to compute the prime factors of
2256 arbitrary-precision T, and put the results in FACTORS. */
2257 static void
2258 mp_factor (mpz_t t, struct mp_factors *factors)
2260 mp_factor_init (factors);
2262 if (mpz_sgn (t) != 0)
2264 mp_factor_using_division (t, factors);
2266 if (mpz_cmp_ui (t, 1) != 0)
2268 devmsg ("[is number prime?] ");
2269 if (mp_prime_p (t))
2270 mp_factor_insert (factors, t);
2271 else
2272 mp_factor_using_pollard_rho (t, 1, factors);
2276 #endif
2278 static strtol_error
2279 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2281 unsigned int lo_carry;
2282 uintmax_t hi = 0, lo = 0;
2284 strtol_error err = LONGINT_INVALID;
2286 /* Skip initial spaces and '+'. */
2287 for (;;)
2289 char c = *s;
2290 if (c == ' ')
2291 s++;
2292 else if (c == '+')
2294 s++;
2295 break;
2297 else
2298 break;
2301 /* Initial scan for invalid digits. */
2302 const char *p = s;
2303 for (;;)
2305 unsigned int c = *p++;
2306 if (c == 0)
2307 break;
2309 if (UNLIKELY (!ISDIGIT (c)))
2311 err = LONGINT_INVALID;
2312 break;
2315 err = LONGINT_OK; /* we've seen at least one valid digit */
2318 for (;err == LONGINT_OK;)
2320 unsigned int c = *s++;
2321 if (c == 0)
2322 break;
2324 c -= '0';
2326 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2328 err = LONGINT_OVERFLOW;
2329 break;
2331 hi = 10 * hi;
2333 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2334 lo_carry += 10 * lo < 2 * lo;
2336 lo = 10 * lo;
2337 lo += c;
2339 lo_carry += lo < c;
2340 hi += lo_carry;
2341 if (UNLIKELY (hi < lo_carry))
2343 err = LONGINT_OVERFLOW;
2344 break;
2348 *hip = hi;
2349 *lop = lo;
2351 return err;
2354 /* Structure and routines for buffering and outputting full lines,
2355 to support parallel operation efficiently. */
2356 static struct lbuf_
2358 char *buf;
2359 char *end;
2360 } lbuf;
2362 /* 512 is chosen to give good performance,
2363 and also is the max guaranteed size that
2364 consumers can read atomically through pipes.
2365 Also it's big enough to cater for max line length
2366 even with 128 bit uintmax_t. */
2367 #define FACTOR_PIPE_BUF 512
2369 static void
2370 lbuf_alloc (void)
2372 if (lbuf.buf)
2373 return;
2375 /* Double to ensure enough space for
2376 previous numbers + next number. */
2377 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2378 lbuf.end = lbuf.buf;
2381 /* Write complete LBUF to standard output. */
2382 static void
2383 lbuf_flush (void)
2385 size_t size = lbuf.end - lbuf.buf;
2386 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2387 die (EXIT_FAILURE, errno, "%s", _("write error"));
2388 lbuf.end = lbuf.buf;
2391 /* Add a character C to LBUF and if it's a newline
2392 and enough bytes are already buffered,
2393 then write atomically to standard output. */
2394 static void
2395 lbuf_putc (char c)
2397 *lbuf.end++ = c;
2399 if (c == '\n')
2401 size_t buffered = lbuf.end - lbuf.buf;
2403 /* Provide immediate output for interactive input. */
2404 static int line_buffered = -1;
2405 if (line_buffered == -1)
2406 line_buffered = isatty (STDIN_FILENO);
2407 if (line_buffered)
2408 lbuf_flush ();
2409 else if (buffered >= FACTOR_PIPE_BUF)
2411 /* Write output in <= PIPE_BUF chunks
2412 so consumers can read atomically. */
2413 char const *tend = lbuf.end;
2415 /* Since a umaxint_t's factors must fit in 512
2416 we're guaranteed to find a newline here. */
2417 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2418 while (*--tlend != '\n');
2419 tlend++;
2421 lbuf.end = tlend;
2422 lbuf_flush ();
2424 /* Buffer the remainder. */
2425 memcpy (lbuf.buf, tlend, tend - tlend);
2426 lbuf.end = lbuf.buf + (tend - tlend);
2431 /* Buffer an int to the internal LBUF. */
2432 static void
2433 lbuf_putint (uintmax_t i, size_t min_width)
2435 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2436 char const *umaxstr = umaxtostr (i, buf);
2437 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2438 size_t z = width;
2440 for (; z < min_width; z++)
2441 *lbuf.end++ = '0';
2443 memcpy (lbuf.end, umaxstr, width);
2444 lbuf.end += width;
2447 static void
2448 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2450 uintmax_t q, r;
2452 if (t1 == 0)
2453 lbuf_putint (t0, 0);
2454 else
2456 /* Use very plain code here since it seems hard to write fast code
2457 without assuming a specific word size. */
2458 q = t1 / 1000000000;
2459 r = t1 % 1000000000;
2460 udiv_qrnnd (t0, r, r, t0, 1000000000);
2461 print_uintmaxes (q, t0);
2462 lbuf_putint (r, 9);
2466 /* Single-precision factoring */
2467 static void
2468 print_factors_single (uintmax_t t1, uintmax_t t0)
2470 struct factors factors;
2472 print_uintmaxes (t1, t0);
2473 lbuf_putc (':');
2475 factor (t1, t0, &factors);
2477 for (unsigned int j = 0; j < factors.nfactors; j++)
2478 for (unsigned int k = 0; k < factors.e[j]; k++)
2480 lbuf_putc (' ');
2481 print_uintmaxes (0, factors.p[j]);
2484 if (factors.plarge[1])
2486 lbuf_putc (' ');
2487 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2490 lbuf_putc ('\n');
2493 /* Emit the factors of the indicated number. If we have the option of using
2494 either algorithm, we select on the basis of the length of the number.
2495 For longer numbers, we prefer the MP algorithm even if the native algorithm
2496 has enough digits, because the algorithm is better. The turnover point
2497 depends on the value. */
2498 static bool
2499 print_factors (const char *input)
2501 uintmax_t t1, t0;
2503 /* Try converting the number to one or two words. If it fails, use GMP or
2504 print an error message. The 2nd condition checks that the most
2505 significant bit of the two-word number is clear, in a typesize neutral
2506 way. */
2507 strtol_error err = strto2uintmax (&t1, &t0, input);
2509 switch (err)
2511 case LONGINT_OK:
2512 if (((t1 << 1) >> 1) == t1)
2514 devmsg ("[using single-precision arithmetic] ");
2515 print_factors_single (t1, t0);
2516 return true;
2518 break;
2520 case LONGINT_OVERFLOW:
2521 /* Try GMP. */
2522 break;
2524 default:
2525 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2526 return false;
2529 #if HAVE_GMP
2530 devmsg ("[using arbitrary-precision arithmetic] ");
2531 mpz_t t;
2532 struct mp_factors factors;
2534 mpz_init_set_str (t, input, 10);
2536 gmp_printf ("%Zd:", t);
2537 mp_factor (t, &factors);
2539 for (unsigned int j = 0; j < factors.nfactors; j++)
2540 for (unsigned int k = 0; k < factors.e[j]; k++)
2541 gmp_printf (" %Zd", factors.p[j]);
2543 mp_factor_clear (&factors);
2544 mpz_clear (t);
2545 putchar ('\n');
2546 fflush (stdout);
2547 return true;
2548 #else
2549 error (0, 0, _("%s is too large"), quote (input));
2550 return false;
2551 #endif
2554 void
2555 usage (int status)
2557 if (status != EXIT_SUCCESS)
2558 emit_try_help ();
2559 else
2561 printf (_("\
2562 Usage: %s [NUMBER]...\n\
2563 or: %s OPTION\n\
2565 program_name, program_name);
2566 fputs (_("\
2567 Print the prime factors of each specified integer NUMBER. If none\n\
2568 are specified on the command line, read them from standard input.\n\
2570 "), stdout);
2571 fputs (HELP_OPTION_DESCRIPTION, stdout);
2572 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2573 emit_ancillary_info (PROGRAM_NAME);
2575 exit (status);
2578 static bool
2579 do_stdin (void)
2581 bool ok = true;
2582 token_buffer tokenbuffer;
2584 init_tokenbuffer (&tokenbuffer);
2586 while (true)
2588 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2589 &tokenbuffer);
2590 if (token_length == (size_t) -1)
2591 break;
2592 ok &= print_factors (tokenbuffer.buffer);
2594 free (tokenbuffer.buffer);
2596 return ok;
2600 main (int argc, char **argv)
2602 initialize_main (&argc, &argv);
2603 set_program_name (argv[0]);
2604 setlocale (LC_ALL, "");
2605 bindtextdomain (PACKAGE, LOCALEDIR);
2606 textdomain (PACKAGE);
2608 lbuf_alloc ();
2609 atexit (close_stdout);
2610 atexit (lbuf_flush);
2612 int c;
2613 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2615 switch (c)
2617 case DEV_DEBUG_OPTION:
2618 dev_debug = true;
2619 break;
2621 case_GETOPT_HELP_CHAR;
2623 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2625 default:
2626 usage (EXIT_FAILURE);
2630 #if STAT_SQUFOF
2631 memset (q_freq, 0, sizeof (q_freq));
2632 #endif
2634 bool ok;
2635 if (argc <= optind)
2636 ok = do_stdin ();
2637 else
2639 ok = true;
2640 for (int i = optind; i < argc; i++)
2641 if (! print_factors (argv[i]))
2642 ok = false;
2645 #if STAT_SQUFOF
2646 if (q_freq[0] > 0)
2648 double acc_f;
2649 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2650 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2652 double f = (double) q_freq[i] / q_freq[0];
2653 acc_f += f;
2654 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2655 100.0 * f, 100.0 * acc_f);
2658 #endif
2660 return ok ? EXIT_SUCCESS : EXIT_FAILURE;