doc: sort: be more descriptive than 'manual'
[coreutils.git] / src / factor.c
blob2649e9fc60e1b528d377256e5bd9083422b1a01d
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organization:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Algorithm:
40 (1) Perform trial division using a small primes table, but without hardware
41 division since the primes table store inverses modulo the word base.
42 (The GMP variant of this code doesn't make use of the precomputed
43 inverses, but instead relies on GMP for fast divisibility testing.)
44 (2) Check the nature of any non-factored part using Miller-Rabin for
45 detecting composites, and Lucas for detecting primes.
46 (3) Factor any remaining composite part using the Pollard-Brent rho
47 algorithm or if USE_SQUFOF is defined to 1, try that first.
48 Status of found factors are checked again using Miller-Rabin and Lucas.
50 We prefer using Hensel norm in the divisions, not the more familiar
51 Euclidean norm, since the former leads to much faster code. In the
52 Pollard-Brent rho code and the prime testing code, we use Montgomery's
53 trick of multiplying all n-residues by the word base, allowing cheap Hensel
54 reductions mod n.
56 The GMP code uses an algorithm that can be considerably slower;
57 for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58 2^{127}-3 takes about 50 ms with the two-word algorithm but would
59 take about 750 ms with the GMP code.
61 Improvements:
63 * Use modular inverses also for exact division in the Lucas code, and
64 elsewhere. A problem is to locate the inverses not from an index, but
65 from a prime. We might instead compute the inverse on-the-fly.
67 * Tune trial division table size (not forgetting that this is a standalone
68 program where the table will be read from secondary storage for
69 each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidean)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #include <gmp.h>
109 #include "system.h"
110 #include "assure.h"
111 #include "full-write.h"
112 #include "quote.h"
113 #include "readtokens.h"
114 #include "xstrtol.h"
116 /* The official name of this program (e.g., no 'g' prefix). */
117 #define PROGRAM_NAME "factor"
119 #define AUTHORS \
120 proper_name ("Paul Rubin"), \
121 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
122 proper_name_lite ("Niels Moller", "Niels M\303\266ller")
124 /* Token delimiters when reading from a file. */
125 #define DELIM "\n\t "
127 #ifndef USE_LONGLONG_H
128 /* With the way we use longlong.h, it's only safe to use
129 when UWtype = UHWtype, as there were various cases
130 (as can be seen in the history for longlong.h) where
131 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
132 to avoid compile time or run time issues. */
133 # if LONG_MAX == INTMAX_MAX
134 # define USE_LONGLONG_H 1
135 # endif
136 #endif
138 #if USE_LONGLONG_H
140 /* Make definitions for longlong.h to make it do what it can do for us */
142 /* bitcount for uintmax_t */
143 # if UINTMAX_MAX == UINT32_MAX
144 # define W_TYPE_SIZE 32
145 # elif UINTMAX_MAX == UINT64_MAX
146 # define W_TYPE_SIZE 64
147 # elif UINTMAX_MAX == UINT128_MAX
148 # define W_TYPE_SIZE 128
149 # endif
151 # define UWtype uintmax_t
152 # define UHWtype unsigned long int
153 # undef UDWtype
154 # if HAVE_ATTRIBUTE_MODE
155 typedef unsigned int UQItype __attribute__ ((mode (QI)));
156 typedef int SItype __attribute__ ((mode (SI)));
157 typedef unsigned int USItype __attribute__ ((mode (SI)));
158 typedef int DItype __attribute__ ((mode (DI)));
159 typedef unsigned int UDItype __attribute__ ((mode (DI)));
160 # else
161 typedef unsigned char UQItype;
162 typedef long SItype;
163 typedef unsigned long int USItype;
164 # if HAVE_LONG_LONG_INT
165 typedef long long int DItype;
166 typedef unsigned long long int UDItype;
167 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
168 typedef long int DItype;
169 typedef unsigned long int UDItype;
170 # endif
171 # endif
172 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
173 # define ASSERT(x) /* FIXME make longlong.h really standalone */
174 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
175 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
176 # ifndef __GMP_GNUC_PREREQ
177 # define __GMP_GNUC_PREREQ(a,b) 1
178 # endif
180 /* These stub macros are only used in longlong.h in certain system compiler
181 combinations, so ensure usage to avoid -Wunused-macros warnings. */
182 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
183 ASSERT (1)
184 __GMP_DECLSPEC
185 # endif
187 # if _ARCH_PPC
188 # define HAVE_HOST_CPU_FAMILY_powerpc 1
189 # endif
190 # include "longlong.h"
191 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
192 const unsigned char factor_clz_tab[129] =
194 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
195 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
196 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
197 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
200 # endif
202 #else /* not USE_LONGLONG_H */
204 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
205 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
206 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
207 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
209 #endif
211 #if !defined __clz_tab && !defined UHWtype
212 /* Without this seemingly useless conditional, gcc -Wunused-macros
213 warns that each of the two tested macros is unused on Fedora 18.
214 FIXME: this is just an ugly band-aid. Fix it properly. */
215 #endif
217 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
218 #define MAX_NFACTS 26
220 enum
222 DEV_DEBUG_OPTION = CHAR_MAX + 1
225 static struct option const long_options[] =
227 {"exponents", no_argument, nullptr, 'h'},
228 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
229 {GETOPT_HELP_OPTION_DECL},
230 {GETOPT_VERSION_OPTION_DECL},
231 {nullptr, 0, nullptr, 0}
234 /* If true, use p^e output format. */
235 static bool print_exponents;
237 struct factors
239 uintmax_t plarge[2]; /* Can have a single large factor */
240 uintmax_t p[MAX_NFACTS];
241 unsigned char e[MAX_NFACTS];
242 unsigned char nfactors;
245 struct mp_factors
247 mpz_t *p;
248 unsigned long int *e;
249 idx_t nfactors;
250 idx_t nalloc;
253 static void factor (uintmax_t, uintmax_t, struct factors *);
255 #ifndef umul_ppmm
256 # define umul_ppmm(w1, w0, u, v) \
257 do { \
258 uintmax_t __x0, __x1, __x2, __x3; \
259 unsigned long int __ul, __vl, __uh, __vh; \
260 uintmax_t __u = (u), __v = (v); \
262 __ul = __ll_lowpart (__u); \
263 __uh = __ll_highpart (__u); \
264 __vl = __ll_lowpart (__v); \
265 __vh = __ll_highpart (__v); \
267 __x0 = (uintmax_t) __ul * __vl; \
268 __x1 = (uintmax_t) __ul * __vh; \
269 __x2 = (uintmax_t) __uh * __vl; \
270 __x3 = (uintmax_t) __uh * __vh; \
272 __x1 += __ll_highpart (__x0);/* This can't give carry. */ \
273 __x1 += __x2; /* But this indeed can. */ \
274 if (__x1 < __x2) /* Did we get it? */ \
275 __x3 += __ll_B; /* Yes, add it in the proper pos. */ \
277 (w1) = __x3 + __ll_highpart (__x1); \
278 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
279 } while (0)
280 #endif
282 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
283 /* Define our own, not needing normalization. This function is
284 currently not performance critical, so keep it simple. Similar to
285 the mod macro below. */
286 # undef udiv_qrnnd
287 # define udiv_qrnnd(q, r, n1, n0, d) \
288 do { \
289 uintmax_t __d1, __d0, __q, __r1, __r0; \
291 __d1 = (d); __d0 = 0; \
292 __r1 = (n1); __r0 = (n0); \
293 affirm (__r1 < __d1); \
294 __q = 0; \
295 for (int __i = W_TYPE_SIZE; __i > 0; __i--) \
297 rsh2 (__d1, __d0, __d1, __d0, 1); \
298 __q <<= 1; \
299 if (ge2 (__r1, __r0, __d1, __d0)) \
301 __q++; \
302 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
305 (r) = __r0; \
306 (q) = __q; \
307 } while (0)
308 #endif
310 #if !defined add_ssaaaa
311 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
312 do { \
313 uintmax_t _add_x; \
314 _add_x = (al) + (bl); \
315 (sh) = (ah) + (bh) + (_add_x < (al)); \
316 (sl) = _add_x; \
317 } while (0)
318 #endif
320 #define rsh2(rh, rl, ah, al, cnt) \
321 do { \
322 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
323 (rh) = (ah) >> (cnt); \
324 } while (0)
326 #define lsh2(rh, rl, ah, al, cnt) \
327 do { \
328 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
329 (rl) = (al) << (cnt); \
330 } while (0)
332 #define ge2(ah, al, bh, bl) \
333 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
335 #define gt2(ah, al, bh, bl) \
336 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
338 #ifndef sub_ddmmss
339 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
340 do { \
341 uintmax_t _cy; \
342 _cy = (al) < (bl); \
343 (rl) = (al) - (bl); \
344 (rh) = (ah) - (bh) - _cy; \
345 } while (0)
346 #endif
348 #ifndef count_leading_zeros
349 # define count_leading_zeros(count, x) do { \
350 uintmax_t __clz_x = (x); \
351 int __clz_c; \
352 for (__clz_c = 0; \
353 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
354 __clz_c += 8) \
355 __clz_x <<= 8; \
356 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
357 __clz_x <<= 1; \
358 (count) = __clz_c; \
359 } while (0)
360 #endif
362 #ifndef count_trailing_zeros
363 # define count_trailing_zeros(count, x) do { \
364 uintmax_t __ctz_x = (x); \
365 int __ctz_c = 0; \
366 while ((__ctz_x & 1) == 0) \
368 __ctz_x >>= 1; \
369 __ctz_c++; \
371 (count) = __ctz_c; \
372 } while (0)
373 #endif
375 /* Requires that a < n and b <= n */
376 #define submod(r,a,b,n) \
377 do { \
378 uintmax_t _t = - (uintmax_t) (a < b); \
379 (r) = ((n) & _t) + (a) - (b); \
380 } while (0)
382 #define addmod(r,a,b,n) \
383 submod ((r), (a), ((n) - (b)), (n))
385 /* Modular two-word addition and subtraction. For performance reasons, the
386 most significant bit of n1 must be clear. The destination variables must be
387 distinct from the mod operand. */
388 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
389 do { \
390 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
391 if (ge2 ((r1), (r0), (n1), (n0))) \
392 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
393 } while (0)
394 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
395 do { \
396 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
397 if ((intmax_t) (r1) < 0) \
398 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
399 } while (0)
401 #define HIGHBIT_TO_MASK(x) \
402 (((intmax_t)-1 >> 1) < 0 \
403 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
404 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
405 ? UINTMAX_MAX : (uintmax_t) 0))
407 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
408 Requires that d1 != 0. */
409 static uintmax_t
410 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
412 int cntd, cnta;
414 affirm (d1 != 0);
416 if (a1 == 0)
418 *r1 = 0;
419 return a0;
422 count_leading_zeros (cntd, d1);
423 count_leading_zeros (cnta, a1);
424 int cnt = cntd - cnta;
425 lsh2 (d1, d0, d1, d0, cnt);
426 for (int i = 0; i < cnt; i++)
428 if (ge2 (a1, a0, d1, d0))
429 sub_ddmmss (a1, a0, a1, a0, d1, d0);
430 rsh2 (d1, d0, d1, d0, 1);
433 *r1 = a1;
434 return a0;
437 ATTRIBUTE_CONST
438 static uintmax_t
439 gcd_odd (uintmax_t a, uintmax_t b)
441 if ((b & 1) == 0)
443 uintmax_t t = b;
444 b = a;
445 a = t;
447 if (a == 0)
448 return b;
450 /* Take out least significant one bit, to make room for sign */
451 b >>= 1;
453 for (;;)
455 uintmax_t t;
456 uintmax_t bgta;
458 while ((a & 1) == 0)
459 a >>= 1;
460 a >>= 1;
462 t = a - b;
463 if (t == 0)
464 return (a << 1) + 1;
466 bgta = HIGHBIT_TO_MASK (t);
468 /* b <-- min (a, b) */
469 b += (bgta & t);
471 /* a <-- |a - b| */
472 a = (t ^ bgta) - bgta;
476 static uintmax_t
477 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
479 affirm (b0 & 1);
481 if ((a0 | a1) == 0)
483 *r1 = b1;
484 return b0;
487 while ((a0 & 1) == 0)
488 rsh2 (a1, a0, a1, a0, 1);
490 for (;;)
492 if ((b1 | a1) == 0)
494 *r1 = 0;
495 return gcd_odd (b0, a0);
498 if (gt2 (a1, a0, b1, b0))
500 sub_ddmmss (a1, a0, a1, a0, b1, b0);
502 rsh2 (a1, a0, a1, a0, 1);
503 while ((a0 & 1) == 0);
505 else if (gt2 (b1, b0, a1, a0))
507 sub_ddmmss (b1, b0, b1, b0, a1, a0);
509 rsh2 (b1, b0, b1, b0, 1);
510 while ((b0 & 1) == 0);
512 else
513 break;
516 *r1 = a1;
517 return a0;
520 static void
521 factor_insert_multiplicity (struct factors *factors,
522 uintmax_t prime, int m)
524 int nfactors = factors->nfactors;
525 uintmax_t *p = factors->p;
526 unsigned char *e = factors->e;
528 /* Locate position for insert new or increment e. */
529 int i;
530 for (i = nfactors - 1; i >= 0; i--)
532 if (p[i] <= prime)
533 break;
536 if (i < 0 || p[i] != prime)
538 for (int j = nfactors - 1; j > i; j--)
540 p[j + 1] = p[j];
541 e[j + 1] = e[j];
543 p[i + 1] = prime;
544 e[i + 1] = m;
545 factors->nfactors = nfactors + 1;
547 else
549 e[i] += m;
553 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
555 static void
556 factor_insert_large (struct factors *factors,
557 uintmax_t p1, uintmax_t p0)
559 if (p1 > 0)
561 affirm (factors->plarge[1] == 0);
562 factors->plarge[0] = p0;
563 factors->plarge[1] = p1;
565 else
566 factor_insert (factors, p0);
569 #ifndef mpz_inits
571 # include <stdarg.h>
573 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
574 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
576 static void
577 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
579 va_list ap;
581 va_start (ap, mpz_single_init);
583 mpz_t *mpz;
584 while ((mpz = va_arg (ap, mpz_t *)))
585 mpz_single_init (*mpz);
587 va_end (ap);
589 #endif
591 static void mp_factor (mpz_t, struct mp_factors *);
593 static void
594 mp_factor_init (struct mp_factors *factors)
596 factors->p = nullptr;
597 factors->e = nullptr;
598 factors->nfactors = 0;
599 factors->nalloc = 0;
602 static void
603 mp_factor_clear (struct mp_factors *factors)
605 for (idx_t i = 0; i < factors->nfactors; i++)
606 mpz_clear (factors->p[i]);
608 free (factors->p);
609 free (factors->e);
612 static void
613 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
615 idx_t nfactors = factors->nfactors;
616 mpz_t *p = factors->p;
617 unsigned long int *e = factors->e;
618 ptrdiff_t i;
620 /* Locate position for insert new or increment e. */
621 for (i = nfactors - 1; i >= 0; i--)
623 if (mpz_cmp (p[i], prime) <= 0)
624 break;
627 if (i < 0 || mpz_cmp (p[i], prime) != 0)
629 if (factors->nfactors == factors->nalloc)
631 p = xpalloc (p, &factors->nalloc, 1, -1, sizeof *p);
632 e = xireallocarray (e, factors->nalloc, sizeof *e);
635 mpz_init (p[nfactors]);
636 for (ptrdiff_t j = nfactors - 1; j > i; j--)
638 mpz_set (p[j + 1], p[j]);
639 e[j + 1] = e[j];
641 mpz_set (p[i + 1], prime);
642 e[i + 1] = 1;
644 factors->p = p;
645 factors->e = e;
646 factors->nfactors = nfactors + 1;
648 else
650 e[i] += 1;
654 static void
655 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
657 mpz_t pz;
659 mpz_init_set_ui (pz, prime);
660 mp_factor_insert (factors, pz);
661 mpz_clear (pz);
665 /* Number of bits in an uintmax_t. */
666 enum { W = sizeof (uintmax_t) * CHAR_BIT };
668 /* Verify that uintmax_t does not have holes in its representation. */
669 static_assert (UINTMAX_MAX >> (W - 1) == 1);
671 #define P(a,b,c,d) a,
672 static const unsigned char primes_diff[] = {
673 #include "primes.h"
674 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
676 #undef P
678 #define PRIMES_PTAB_ENTRIES \
679 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
681 #define P(a,b,c,d) b,
682 static const unsigned char primes_diff8[] = {
683 #include "primes.h"
684 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
686 #undef P
688 struct primes_dtab
690 uintmax_t binv, lim;
693 #define P(a,b,c,d) {c,d},
694 static const struct primes_dtab primes_dtab[] = {
695 #include "primes.h"
696 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
698 #undef P
700 /* Verify that uintmax_t is not wider than
701 the integers used to generate primes.h. */
702 static_assert (W <= WIDE_UINT_BITS);
704 /* debugging for developers. Enables devmsg().
705 This flag is used only in the GMP code. */
706 static bool dev_debug = false;
708 /* Prove primality or run probabilistic tests. */
709 static bool flag_prove_primality = PROVE_PRIMALITY;
711 /* Number of Miller-Rabin tests to run when not proving primality. */
712 #define MR_REPS 25
714 static void
715 factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off)
717 for (int j = 0; j < off; j++)
718 p += primes_diff[i + j];
719 factor_insert (factors, p);
722 /* Trial division with odd primes uses the following trick.
724 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
725 consider the case t < B (this is the second loop below).
727 From our tables we get
729 binv = p^{-1} (mod B)
730 lim = floor ((B-1) / p).
732 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
733 (and all quotients in this range occur for some t).
735 Then t = q * p is true also (mod B), and p is invertible we get
737 q = t * binv (mod B).
739 Next, assume that t is *not* divisible by p. Since multiplication
740 by binv (mod B) is a one-to-one mapping,
742 t * binv (mod B) > lim,
744 because all the smaller values are already taken.
746 This can be summed up by saying that the function
748 q(t) = binv * t (mod B)
750 is a permutation of the range 0 <= t < B, with the curious property
751 that it maps the multiples of p onto the range 0 <= q <= lim, in
752 order, and the non-multiples of p onto the range lim < q < B.
755 static uintmax_t
756 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
757 struct factors *factors)
759 if (t0 % 2 == 0)
761 int cnt;
763 if (t0 == 0)
765 count_trailing_zeros (cnt, t1);
766 t0 = t1 >> cnt;
767 t1 = 0;
768 cnt += W_TYPE_SIZE;
770 else
772 count_trailing_zeros (cnt, t0);
773 rsh2 (t1, t0, t1, t0, cnt);
776 factor_insert_multiplicity (factors, 2, cnt);
779 uintmax_t p = 3;
780 idx_t i;
781 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
783 for (;;)
785 uintmax_t q1, q0, hi;
786 MAYBE_UNUSED uintmax_t lo;
788 q0 = t0 * primes_dtab[i].binv;
789 umul_ppmm (hi, lo, q0, p);
790 if (hi > t1)
791 break;
792 hi = t1 - hi;
793 q1 = hi * primes_dtab[i].binv;
794 if (LIKELY (q1 > primes_dtab[i].lim))
795 break;
796 t1 = q1; t0 = q0;
797 factor_insert (factors, p);
799 p += primes_diff[i + 1];
801 if (t1p)
802 *t1p = t1;
804 #define DIVBLOCK(I) \
805 do { \
806 for (;;) \
808 q = t0 * pd[I].binv; \
809 if (LIKELY (q > pd[I].lim)) \
810 break; \
811 t0 = q; \
812 factor_insert_refind (factors, p, i + 1, I); \
814 } while (0)
816 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
818 uintmax_t q;
819 const struct primes_dtab *pd = &primes_dtab[i];
820 DIVBLOCK (0);
821 DIVBLOCK (1);
822 DIVBLOCK (2);
823 DIVBLOCK (3);
824 DIVBLOCK (4);
825 DIVBLOCK (5);
826 DIVBLOCK (6);
827 DIVBLOCK (7);
829 p += primes_diff8[i];
830 if (p * p > t0)
831 break;
834 return t0;
837 static void
838 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
840 mpz_t q;
841 mp_bitcnt_t p;
843 devmsg ("[trial division] ");
845 mpz_init (q);
847 p = mpz_scan1 (t, 0);
848 mpz_fdiv_q_2exp (t, t, p);
849 while (p)
851 mp_factor_insert_ui (factors, 2);
852 --p;
855 unsigned long int d = 3;
856 for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;)
858 if (! mpz_divisible_ui_p (t, d))
860 d += primes_diff[i++];
861 if (mpz_cmp_ui (t, d * d) < 0)
862 break;
864 else
866 mpz_tdiv_q_ui (t, t, d);
867 mp_factor_insert_ui (factors, d);
871 mpz_clear (q);
874 /* Entry i contains (2i+1)^(-1) mod 2^8. */
875 static const unsigned char binvert_table[128] =
877 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
878 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
879 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
880 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
881 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
882 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
883 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
884 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
885 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
886 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
887 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
888 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
889 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
890 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
891 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
892 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
895 /* Compute n^(-1) mod B, using a Newton iteration. */
896 #define binv(inv,n) \
897 do { \
898 uintmax_t __n = (n); \
899 uintmax_t __inv; \
901 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
902 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
903 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
904 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
906 if (W_TYPE_SIZE > 64) \
908 int __invbits = 64; \
909 do { \
910 __inv = 2 * __inv - __inv * __inv * __n; \
911 __invbits *= 2; \
912 } while (__invbits < W_TYPE_SIZE); \
915 (inv) = __inv; \
916 } while (0)
918 /* q = u / d, assuming d|u. */
919 #define divexact_21(q1, q0, u1, u0, d) \
920 do { \
921 uintmax_t _di, _q0; \
922 binv (_di, (d)); \
923 _q0 = (u0) * _di; \
924 if ((u1) >= (d)) \
926 uintmax_t _p1; \
927 MAYBE_UNUSED intmax_t _p0; \
928 umul_ppmm (_p1, _p0, _q0, d); \
929 (q1) = ((u1) - _p1) * _di; \
930 (q0) = _q0; \
932 else \
934 (q0) = _q0; \
935 (q1) = 0; \
937 } while (0)
939 /* x B (mod n). */
940 #define redcify(r_prim, r, n) \
941 do { \
942 MAYBE_UNUSED uintmax_t _redcify_q; \
943 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
944 } while (0)
946 /* x B^2 (mod n). Requires x > 0, n1 < B/2. */
947 #define redcify2(r1, r0, x, n1, n0) \
948 do { \
949 uintmax_t _r1, _r0, _i; \
950 if ((x) < (n1)) \
952 _r1 = (x); _r0 = 0; \
953 _i = W_TYPE_SIZE; \
955 else \
957 _r1 = 0; _r0 = (x); \
958 _i = 2 * W_TYPE_SIZE; \
960 while (_i-- > 0) \
962 lsh2 (_r1, _r0, _r1, _r0, 1); \
963 if (ge2 (_r1, _r0, (n1), (n0))) \
964 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
966 (r1) = _r1; \
967 (r0) = _r0; \
968 } while (0)
970 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
971 Both a and b must be in redc form, the result will be in redc form too. */
972 static inline uintmax_t
973 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
975 uintmax_t rh, rl, q, th, xh;
976 MAYBE_UNUSED uintmax_t tl;
978 umul_ppmm (rh, rl, a, b);
979 q = rl * mi;
980 umul_ppmm (th, tl, q, m);
981 xh = rh - th;
982 if (rh < th)
983 xh += m;
985 return xh;
988 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
989 Both a and b must be in redc form, the result will be in redc form too.
990 For performance reasons, the most significant bit of m must be clear. */
991 static uintmax_t
992 mulredc2 (uintmax_t *r1p,
993 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
994 uintmax_t m1, uintmax_t m0, uintmax_t mi)
996 uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
997 MAYBE_UNUSED uintmax_t p0;
998 mi = -mi;
999 affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0);
1000 affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0);
1001 affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0);
1003 /* First compute a0 * <b1, b0> B^{-1}
1004 +-----+
1005 |a0 b0|
1006 +--+--+--+
1007 |a0 b1|
1008 +--+--+--+
1009 |q0 m0|
1010 +--+--+--+
1011 |q0 m1|
1012 -+--+--+--+
1013 |r1|r0| 0|
1014 +--+--+--+
1016 umul_ppmm (t1, t0, a0, b0);
1017 umul_ppmm (r1, r0, a0, b1);
1018 q = mi * t0;
1019 umul_ppmm (p1, p0, q, m0);
1020 umul_ppmm (s1, s0, q, m1);
1021 r0 += (t0 != 0); /* Carry */
1022 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1023 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1024 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1026 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1027 +-----+
1028 |a1 b0|
1029 +--+--+
1030 |r1|r0|
1031 +--+--+--+
1032 |a1 b1|
1033 +--+--+--+
1034 |q1 m0|
1035 +--+--+--+
1036 |q1 m1|
1037 -+--+--+--+
1038 |r1|r0| 0|
1039 +--+--+--+
1041 umul_ppmm (t1, t0, a1, b0);
1042 umul_ppmm (s1, s0, a1, b1);
1043 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1044 q = mi * t0;
1045 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1046 umul_ppmm (p1, p0, q, m0);
1047 umul_ppmm (s1, s0, q, m1);
1048 r0 += (t0 != 0); /* Carry */
1049 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1050 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1051 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1053 if (ge2 (r1, r0, m1, m0))
1054 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1056 *r1p = r1;
1057 return r0;
1060 ATTRIBUTE_CONST
1061 static uintmax_t
1062 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1064 uintmax_t y = one;
1066 if (e & 1)
1067 y = b;
1069 while (e != 0)
1071 b = mulredc (b, b, n, ni);
1072 e >>= 1;
1074 if (e & 1)
1075 y = mulredc (y, b, n, ni);
1078 return y;
1081 static uintmax_t
1082 powm2 (uintmax_t *r1m,
1083 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1084 uintmax_t ni, const uintmax_t *one)
1086 uintmax_t r1, r0, b1, b0, n1, n0;
1087 int i;
1088 uintmax_t e;
1090 b0 = bp[0];
1091 b1 = bp[1];
1092 n0 = np[0];
1093 n1 = np[1];
1095 r0 = one[0];
1096 r1 = one[1];
1098 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1100 if (e & 1)
1102 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1103 r1 = *r1m;
1105 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1106 b1 = *r1m;
1108 for (e = ep[1]; e > 0; e >>= 1)
1110 if (e & 1)
1112 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1113 r1 = *r1m;
1115 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1116 b1 = *r1m;
1118 *r1m = r1;
1119 return r0;
1122 ATTRIBUTE_CONST
1123 static bool
1124 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1125 int k, uintmax_t one)
1127 uintmax_t y = powm (b, q, n, ni, one);
1129 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1131 if (y == one || y == nm1)
1132 return true;
1134 for (int i = 1; i < k; i++)
1136 y = mulredc (y, y, n, ni);
1138 if (y == nm1)
1139 return true;
1140 if (y == one)
1141 return false;
1143 return false;
1146 ATTRIBUTE_PURE static bool
1147 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1148 const uintmax_t *qp, int k, const uintmax_t *one)
1150 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1152 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1153 y1 = r1m;
1155 if (y0 == one[0] && y1 == one[1])
1156 return true;
1158 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1160 if (y0 == nm1_0 && y1 == nm1_1)
1161 return true;
1163 for (int i = 1; i < k; i++)
1165 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1166 y1 = r1m;
1168 if (y0 == nm1_0 && y1 == nm1_1)
1169 return true;
1170 if (y0 == one[0] && y1 == one[1])
1171 return false;
1173 return false;
1176 static bool
1177 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1178 mpz_srcptr q, mp_bitcnt_t k)
1180 mpz_powm (y, x, q, n);
1182 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1183 return true;
1185 for (mp_bitcnt_t i = 1; i < k; i++)
1187 mpz_powm_ui (y, y, 2, n);
1188 if (mpz_cmp (y, nm1) == 0)
1189 return true;
1190 if (mpz_cmp_ui (y, 1) == 0)
1191 return false;
1193 return false;
1196 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1197 have been observed. The average seem to be about 2. */
1198 static bool ATTRIBUTE_PURE
1199 prime_p (uintmax_t n)
1201 mp_bitcnt_t k;
1202 bool is_prime;
1203 uintmax_t a_prim, one, ni;
1204 struct factors factors;
1206 if (n <= 1)
1207 return false;
1209 /* We have already cast out small primes. */
1210 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1211 return true;
1213 /* Precomputation for Miller-Rabin. */
1214 uintmax_t q = n - 1;
1215 for (k = 0; (q & 1) == 0; k++)
1216 q >>= 1;
1218 uintmax_t a = 2;
1219 binv (ni, n); /* ni <- 1/n mod B */
1220 redcify (one, 1, n);
1221 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1223 /* Perform a Miller-Rabin test, finds most composites quickly. */
1224 if (!millerrabin (n, ni, a_prim, q, k, one))
1225 return false;
1227 if (flag_prove_primality)
1229 /* Factor n-1 for Lucas. */
1230 factor (0, n - 1, &factors);
1233 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1234 number composite. */
1235 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1237 if (flag_prove_primality)
1239 is_prime = true;
1240 for (int i = 0; i < factors.nfactors && is_prime; i++)
1242 is_prime
1243 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1246 else
1248 /* After enough Miller-Rabin runs, be content. */
1249 is_prime = (r == MR_REPS - 1);
1252 if (is_prime)
1253 return true;
1255 a += primes_diff[r]; /* Establish new base. */
1257 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1258 on most processors, since it avoids udiv_qrnnd. If we go down the
1259 udiv_qrnnd_preinv path, this code should be replaced. */
1261 uintmax_t s1, s0;
1262 umul_ppmm (s1, s0, one, a);
1263 if (LIKELY (s1 == 0))
1264 a_prim = s0 % n;
1265 else
1267 MAYBE_UNUSED uintmax_t dummy;
1268 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1272 if (!millerrabin (n, ni, a_prim, q, k, one))
1273 return false;
1276 affirm (!"Lucas prime test failure. This should not happen");
1279 static bool ATTRIBUTE_PURE
1280 prime2_p (uintmax_t n1, uintmax_t n0)
1282 uintmax_t q[2], nm1[2];
1283 uintmax_t a_prim[2];
1284 uintmax_t one[2];
1285 uintmax_t na[2];
1286 uintmax_t ni;
1287 int k;
1288 struct factors factors;
1290 if (n1 == 0)
1291 return prime_p (n0);
1293 nm1[1] = n1 - (n0 == 0);
1294 nm1[0] = n0 - 1;
1295 if (nm1[0] == 0)
1297 count_trailing_zeros (k, nm1[1]);
1299 q[0] = nm1[1] >> k;
1300 q[1] = 0;
1301 k += W_TYPE_SIZE;
1303 else
1305 count_trailing_zeros (k, nm1[0]);
1306 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1309 uintmax_t a = 2;
1310 binv (ni, n0);
1311 redcify2 (one[1], one[0], 1, n1, n0);
1312 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1314 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1315 na[0] = n0;
1316 na[1] = n1;
1318 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1319 return false;
1321 if (flag_prove_primality)
1323 /* Factor n-1 for Lucas. */
1324 factor (nm1[1], nm1[0], &factors);
1327 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1328 number composite. */
1329 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1331 bool is_prime;
1332 uintmax_t e[2], y[2];
1334 if (flag_prove_primality)
1336 is_prime = true;
1337 if (factors.plarge[1])
1339 uintmax_t pi;
1340 binv (pi, factors.plarge[0]);
1341 e[0] = pi * nm1[0];
1342 e[1] = 0;
1343 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1344 is_prime = (y[0] != one[0] || y[1] != one[1]);
1346 for (int i = 0; i < factors.nfactors && is_prime; i++)
1348 /* FIXME: We always have the factor 2. Do we really need to
1349 handle it here? We have done the same powering as part
1350 of millerrabin. */
1351 if (factors.p[i] == 2)
1352 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1353 else
1354 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1355 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1356 is_prime = (y[0] != one[0] || y[1] != one[1]);
1359 else
1361 /* After enough Miller-Rabin runs, be content. */
1362 is_prime = (r == MR_REPS - 1);
1365 if (is_prime)
1366 return true;
1368 a += primes_diff[r]; /* Establish new base. */
1369 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1371 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1372 return false;
1375 affirm (!"Lucas prime test failure. This should not happen");
1378 static bool
1379 mp_prime_p (mpz_t n)
1381 bool is_prime;
1382 mpz_t q, a, nm1, tmp;
1383 struct mp_factors factors;
1385 if (mpz_cmp_ui (n, 1) <= 0)
1386 return false;
1388 /* We have already cast out small primes. */
1389 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1390 return true;
1392 mpz_inits (q, a, nm1, tmp, nullptr);
1394 /* Precomputation for Miller-Rabin. */
1395 mpz_sub_ui (nm1, n, 1);
1397 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1398 mp_bitcnt_t k = mpz_scan1 (nm1, 0);
1399 mpz_tdiv_q_2exp (q, nm1, k);
1401 mpz_set_ui (a, 2);
1403 /* Perform a Miller-Rabin test, finds most composites quickly. */
1404 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1406 is_prime = false;
1407 goto ret2;
1410 if (flag_prove_primality)
1412 /* Factor n-1 for Lucas. */
1413 mpz_set (tmp, nm1);
1414 mp_factor (tmp, &factors);
1417 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1418 number composite. */
1419 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1421 if (flag_prove_primality)
1423 is_prime = true;
1424 for (idx_t i = 0; i < factors.nfactors && is_prime; i++)
1426 mpz_divexact (tmp, nm1, factors.p[i]);
1427 mpz_powm (tmp, a, tmp, n);
1428 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1431 else
1433 /* After enough Miller-Rabin runs, be content. */
1434 is_prime = (r == MR_REPS - 1);
1437 if (is_prime)
1438 goto ret1;
1440 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1442 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1444 is_prime = false;
1445 goto ret1;
1449 affirm (!"Lucas prime test failure. This should not happen");
1451 ret1:
1452 if (flag_prove_primality)
1453 mp_factor_clear (&factors);
1454 ret2:
1455 mpz_clears (q, a, nm1, tmp, nullptr);
1457 return is_prime;
1460 static void
1461 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1462 struct factors *factors)
1464 uintmax_t x, z, y, P, t, ni, g;
1466 unsigned long int k = 1;
1467 unsigned long int l = 1;
1469 redcify (P, 1, n);
1470 addmod (x, P, P, n); /* i.e., redcify(2) */
1471 y = z = x;
1473 while (n != 1)
1475 affirm (a < n);
1477 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1479 for (;;)
1483 x = mulredc (x, x, n, ni);
1484 addmod (x, x, a, n);
1486 submod (t, z, x, n);
1487 P = mulredc (P, t, n, ni);
1489 if (k % 32 == 1)
1491 if (gcd_odd (P, n) != 1)
1492 goto factor_found;
1493 y = x;
1496 while (--k != 0);
1498 z = x;
1499 k = l;
1500 l = 2 * l;
1501 for (unsigned long int i = 0; i < k; i++)
1503 x = mulredc (x, x, n, ni);
1504 addmod (x, x, a, n);
1506 y = x;
1509 factor_found:
1512 y = mulredc (y, y, n, ni);
1513 addmod (y, y, a, n);
1515 submod (t, z, y, n);
1516 g = gcd_odd (t, n);
1518 while (g == 1);
1520 if (n == g)
1522 /* Found n itself as factor. Restart with different params. */
1523 factor_using_pollard_rho (n, a + 1, factors);
1524 return;
1527 n = n / g;
1529 if (!prime_p (g))
1530 factor_using_pollard_rho (g, a + 1, factors);
1531 else
1532 factor_insert (factors, g);
1534 if (prime_p (n))
1536 factor_insert (factors, n);
1537 break;
1540 x = x % n;
1541 z = z % n;
1542 y = y % n;
1546 static void
1547 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1548 struct factors *factors)
1550 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1552 unsigned long int k = 1;
1553 unsigned long int l = 1;
1555 redcify2 (P1, P0, 1, n1, n0);
1556 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1557 y1 = z1 = x1;
1558 y0 = z0 = x0;
1560 while (n1 != 0 || n0 != 1)
1562 binv (ni, n0);
1564 for (;;)
1568 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1569 x1 = r1m;
1570 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1572 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1573 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1574 P1 = r1m;
1576 if (k % 32 == 1)
1578 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1579 if (g1 != 0 || g0 != 1)
1580 goto factor_found;
1581 y1 = x1; y0 = x0;
1584 while (--k != 0);
1586 z1 = x1; z0 = x0;
1587 k = l;
1588 l = 2 * l;
1589 for (unsigned long int i = 0; i < k; i++)
1591 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1592 x1 = r1m;
1593 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1595 y1 = x1; y0 = x0;
1598 factor_found:
1601 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1602 y1 = r1m;
1603 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1605 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1606 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1608 while (g1 == 0 && g0 == 1);
1610 if (g1 == 0)
1612 /* The found factor is one word, and > 1. */
1613 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1615 if (!prime_p (g0))
1616 factor_using_pollard_rho (g0, a + 1, factors);
1617 else
1618 factor_insert (factors, g0);
1620 else
1622 /* The found factor is two words. This is highly unlikely, thus hard
1623 to trigger. Please be careful before you change this code! */
1624 uintmax_t ginv;
1626 if (n1 == g1 && n0 == g0)
1628 /* Found n itself as factor. Restart with different params. */
1629 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1630 return;
1633 /* Compute n = n / g. Since the result will fit one word,
1634 we can compute the quotient modulo B, ignoring the high
1635 divisor word. */
1636 binv (ginv, g0);
1637 n0 = ginv * n0;
1638 n1 = 0;
1640 if (!prime2_p (g1, g0))
1641 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1642 else
1643 factor_insert_large (factors, g1, g0);
1646 if (n1 == 0)
1648 if (prime_p (n0))
1650 factor_insert (factors, n0);
1651 break;
1654 factor_using_pollard_rho (n0, a, factors);
1655 return;
1658 if (prime2_p (n1, n0))
1660 factor_insert_large (factors, n1, n0);
1661 break;
1664 x0 = mod2 (&x1, x1, x0, n1, n0);
1665 z0 = mod2 (&z1, z1, z0, n1, n0);
1666 y0 = mod2 (&y1, y1, y0, n1, n0);
1670 static void
1671 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1672 struct mp_factors *factors)
1674 mpz_t x, z, y, P;
1675 mpz_t t, t2;
1677 devmsg ("[pollard-rho (%lu)] ", a);
1679 mpz_inits (t, t2, nullptr);
1680 mpz_init_set_si (y, 2);
1681 mpz_init_set_si (x, 2);
1682 mpz_init_set_si (z, 2);
1683 mpz_init_set_ui (P, 1);
1685 unsigned long long int k = 1;
1686 unsigned long long int l = 1;
1688 while (mpz_cmp_ui (n, 1) != 0)
1690 for (;;)
1694 mpz_mul (t, x, x);
1695 mpz_mod (x, t, n);
1696 mpz_add_ui (x, x, a);
1698 mpz_sub (t, z, x);
1699 mpz_mul (t2, P, t);
1700 mpz_mod (P, t2, n);
1702 if (k % 32 == 1)
1704 mpz_gcd (t, P, n);
1705 if (mpz_cmp_ui (t, 1) != 0)
1706 goto factor_found;
1707 mpz_set (y, x);
1710 while (--k != 0);
1712 mpz_set (z, x);
1713 k = l;
1714 l = 2 * l;
1715 for (unsigned long long int i = 0; i < k; i++)
1717 mpz_mul (t, x, x);
1718 mpz_mod (x, t, n);
1719 mpz_add_ui (x, x, a);
1721 mpz_set (y, x);
1724 factor_found:
1727 mpz_mul (t, y, y);
1728 mpz_mod (y, t, n);
1729 mpz_add_ui (y, y, a);
1731 mpz_sub (t, z, y);
1732 mpz_gcd (t, t, n);
1734 while (mpz_cmp_ui (t, 1) == 0);
1736 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1738 if (!mp_prime_p (t))
1740 devmsg ("[composite factor--restarting pollard-rho] ");
1741 mp_factor_using_pollard_rho (t, a + 1, factors);
1743 else
1745 mp_factor_insert (factors, t);
1748 if (mp_prime_p (n))
1750 mp_factor_insert (factors, n);
1751 break;
1754 mpz_mod (x, x, n);
1755 mpz_mod (z, z, n);
1756 mpz_mod (y, y, n);
1759 mpz_clears (P, t2, t, z, x, y, nullptr);
1762 #if USE_SQUFOF
1763 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1764 algorithm is replaced, consider also returning the remainder. */
1765 ATTRIBUTE_CONST
1766 static uintmax_t
1767 isqrt (uintmax_t n)
1769 uintmax_t x;
1770 int c;
1771 if (n == 0)
1772 return 0;
1774 count_leading_zeros (c, n);
1776 /* Make x > sqrt(n). This will be invariant through the loop. */
1777 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
1779 for (;;)
1781 uintmax_t y = (x + n / x) / 2;
1782 if (y >= x)
1783 return x;
1785 x = y;
1789 ATTRIBUTE_CONST
1790 static uintmax_t
1791 isqrt2 (uintmax_t nh, uintmax_t nl)
1793 int shift;
1794 uintmax_t x;
1796 /* Ensures the remainder fits in an uintmax_t. */
1797 affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1799 if (nh == 0)
1800 return isqrt (nl);
1802 count_leading_zeros (shift, nh);
1803 shift &= ~1;
1805 /* Make x > sqrt (n). */
1806 x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1807 x <<= (W_TYPE_SIZE - shift) >> 1;
1809 /* Do we need more than one iteration? */
1810 for (;;)
1812 MAYBE_UNUSED uintmax_t r;
1813 uintmax_t q, y;
1814 udiv_qrnnd (q, r, nh, nl, x);
1815 y = (x + q) / 2;
1817 if (y >= x)
1819 uintmax_t hi, lo;
1820 umul_ppmm (hi, lo, x + 1, x + 1);
1821 affirm (gt2 (hi, lo, nh, nl));
1823 umul_ppmm (hi, lo, x, x);
1824 affirm (ge2 (nh, nl, hi, lo));
1825 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1826 affirm (hi == 0);
1828 return x;
1831 x = y;
1835 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1836 # define MAGIC64 0x0202021202030213ULL
1837 # define MAGIC63 0x0402483012450293ULL
1838 # define MAGIC65 0x218a019866014613ULL
1839 # define MAGIC11 0x23b
1841 /* Return the square root if the input is a square, otherwise 0. */
1842 ATTRIBUTE_CONST
1843 static uintmax_t
1844 is_square (uintmax_t x)
1846 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1847 computing the square root. */
1848 if (((MAGIC64 >> (x & 63)) & 1)
1849 && ((MAGIC63 >> (x % 63)) & 1)
1850 /* Both 0 and 64 are squares mod (65). */
1851 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1852 && ((MAGIC11 >> (x % 11) & 1)))
1854 uintmax_t r = isqrt (x);
1855 if (r * r == x)
1856 return r;
1858 return 0;
1861 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1862 static short const invtab[0x81] =
1864 0x200,
1865 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1866 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1867 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1868 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1869 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1870 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1871 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1872 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1873 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1874 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1875 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1876 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1877 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1878 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1879 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1880 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1883 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1884 that q < 0x40; here it instead uses a table of (Euclidean) inverses. */
1885 # define div_smallq(q, r, u, d) \
1886 do { \
1887 if ((u) / 0x40 < (d)) \
1889 int _cnt; \
1890 uintmax_t _dinv, _mask, _q, _r; \
1891 count_leading_zeros (_cnt, (d)); \
1892 _r = (u); \
1893 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1895 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1896 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1898 else \
1900 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1901 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1903 _r -= _q * (d); \
1905 _mask = -(uintmax_t) (_r >= (d)); \
1906 (r) = _r - (_mask & (d)); \
1907 (q) = _q - _mask; \
1908 affirm ((q) * (d) + (r) == u); \
1910 else \
1912 uintmax_t _q = (u) / (d); \
1913 (r) = (u) - _q * (d); \
1914 (q) = _q; \
1916 } while (0)
1918 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1919 = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1920 with 3025 = 55^2.
1922 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1923 representing G_0 = (-55, 2 * 4652, 8653).
1925 In the notation of the paper:
1927 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1931 t_0 = floor([q_0 + R_0] / S0) = 1
1932 R_1 = t_0 * S_0 - R_0 = 4001
1933 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1936 /* Multipliers, in order of efficiency:
1937 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1938 0.7317 3*5*7 = 105 = 1
1939 0.7820 3*5*11 = 165 = 1
1940 0.7872 3*5 = 15 = 3
1941 0.8101 3*7*11 = 231 = 3
1942 0.8155 3*7 = 21 = 1
1943 0.8284 5*7*11 = 385 = 1
1944 0.8339 5*7 = 35 = 3
1945 0.8716 3*11 = 33 = 1
1946 0.8774 3 = 3 = 3
1947 0.8913 5*11 = 55 = 3
1948 0.8972 5 = 5 = 1
1949 0.9233 7*11 = 77 = 1
1950 0.9295 7 = 7 = 3
1951 0.9934 11 = 11 = 3
1953 # define QUEUE_SIZE 50
1954 #endif
1956 #if STAT_SQUFOF
1957 # define Q_FREQ_SIZE 50
1958 /* Element 0 keeps the total */
1959 static int q_freq[Q_FREQ_SIZE + 1];
1960 #endif
1962 #if USE_SQUFOF
1963 /* Return true on success. Expected to fail only for numbers
1964 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1965 static bool
1966 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1968 /* Uses algorithm and notation from
1970 SQUARE FORM FACTORIZATION
1971 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1973 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1976 static short const multipliers_1[] =
1977 { /* = 1 (mod 4) */
1978 105, 165, 21, 385, 33, 5, 77, 1, 0
1980 static short const multipliers_3[] =
1981 { /* = 3 (mod 4) */
1982 1155, 15, 231, 35, 3, 55, 7, 11, 0
1985 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1987 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1988 return false;
1990 uintmax_t sqrt_n = isqrt2 (n1, n0);
1992 if (n0 == sqrt_n * sqrt_n)
1994 uintmax_t p1, p0;
1996 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1997 affirm (p0 == n0);
1999 if (n1 == p1)
2001 if (prime_p (sqrt_n))
2002 factor_insert_multiplicity (factors, sqrt_n, 2);
2003 else
2005 struct factors f;
2007 f.nfactors = 0;
2008 if (!factor_using_squfof (0, sqrt_n, &f))
2010 /* Try pollard rho instead */
2011 factor_using_pollard_rho (sqrt_n, 1, &f);
2013 /* Duplicate the new factors */
2014 for (unsigned int i = 0; i < f.nfactors; i++)
2015 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
2017 return true;
2021 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2022 for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2023 *m; m++)
2025 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2026 unsigned int i;
2027 unsigned int mu = *m;
2028 int qpos = 0;
2030 affirm (mu * n0 % 4 == 3);
2032 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2033 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2034 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2035 mu^3 < n.
2037 However, this seems insufficient: With n = 37243139 and mu =
2038 105, we get a trivial factor, from the square 38809 = 197^2,
2039 without any corresponding Q earlier in the iteration.
2041 Requiring 64 mu^3 < n seems sufficient. */
2042 if (n1 == 0)
2044 if ((uintmax_t) mu * mu * mu >= n0 / 64)
2045 continue;
2047 else
2049 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2050 continue;
2052 umul_ppmm (Dh, Dl, n0, mu);
2053 Dh += n1 * mu;
2055 affirm (Dl % 4 != 1);
2056 affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2058 S = isqrt2 (Dh, Dl);
2060 Q1 = 1;
2061 P = S;
2063 /* Square root remainder fits in one word, so ignore high part. */
2064 Q = Dl - P * P;
2065 /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
2066 L = isqrt (2 * S);
2067 B = 2 * L;
2068 L1 = mu * 2 * L;
2070 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2071 4 D. */
2073 for (i = 0; i <= B; i++)
2075 uintmax_t q, P1, t, rem;
2077 div_smallq (q, rem, S + P, Q);
2078 P1 = S - rem; /* P1 = q*Q - P */
2080 affirm (q > 0 && Q > 0);
2082 # if STAT_SQUFOF
2083 q_freq[0]++;
2084 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2085 # endif
2087 if (Q <= L1)
2089 uintmax_t g = Q;
2091 if ((Q & 1) == 0)
2092 g /= 2;
2094 g /= gcd_odd (g, mu);
2096 if (g <= L)
2098 if (qpos >= QUEUE_SIZE)
2099 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2100 queue[qpos].Q = g;
2101 queue[qpos].P = P % g;
2102 qpos++;
2106 /* I think the difference can be either sign, but mod
2107 2^W_TYPE_SIZE arithmetic should be fine. */
2108 t = Q1 + q * (P - P1);
2109 Q1 = Q;
2110 Q = t;
2111 P = P1;
2113 if ((i & 1) == 0)
2115 uintmax_t r = is_square (Q);
2116 if (r)
2118 for (int j = 0; j < qpos; j++)
2120 if (queue[j].Q == r)
2122 if (r == 1)
2123 /* Traversed entire cycle. */
2124 goto next_multiplier;
2126 /* Need the absolute value for divisibility test. */
2127 if (P >= queue[j].P)
2128 t = P - queue[j].P;
2129 else
2130 t = queue[j].P - P;
2131 if (t % r == 0)
2133 /* Delete entries up to and including entry
2134 j, which matched. */
2135 memmove (queue, queue + j + 1,
2136 (qpos - j - 1) * sizeof (queue[0]));
2137 qpos -= (j + 1);
2139 goto next_i;
2143 /* We have found a square form, which should give a
2144 factor. */
2145 Q1 = r;
2146 affirm (S >= P); /* What signs are possible? */
2147 P += r * ((S - P) / r);
2149 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2150 for the case D = 2N. */
2151 /* Compute Q = (D - P*P) / Q1, but we need double
2152 precision. */
2153 uintmax_t hi, lo;
2154 umul_ppmm (hi, lo, P, P);
2155 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2156 udiv_qrnnd (Q, rem, hi, lo, Q1);
2157 affirm (rem == 0);
2159 for (;;)
2161 /* Note: There appears to by a typo in the paper,
2162 Step 4a in the algorithm description says q <--
2163 floor([S+P]/\hat Q), but looking at the equations
2164 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2165 (In this code, \hat Q is Q1). */
2166 div_smallq (q, rem, S + P, Q);
2167 P1 = S - rem; /* P1 = q*Q - P */
2169 # if STAT_SQUFOF
2170 q_freq[0]++;
2171 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2172 # endif
2173 if (P == P1)
2174 break;
2175 t = Q1 + q * (P - P1);
2176 Q1 = Q;
2177 Q = t;
2178 P = P1;
2181 if ((Q & 1) == 0)
2182 Q /= 2;
2183 Q /= gcd_odd (Q, mu);
2185 affirm (Q > 1 && (n1 || Q < n0));
2187 if (prime_p (Q))
2188 factor_insert (factors, Q);
2189 else if (!factor_using_squfof (0, Q, factors))
2190 factor_using_pollard_rho (Q, 2, factors);
2192 divexact_21 (n1, n0, n1, n0, Q);
2194 if (prime2_p (n1, n0))
2195 factor_insert_large (factors, n1, n0);
2196 else
2198 if (!factor_using_squfof (n1, n0, factors))
2200 if (n1 == 0)
2201 factor_using_pollard_rho (n0, 1, factors);
2202 else
2203 factor_using_pollard_rho2 (n1, n0, 1, factors);
2207 return true;
2210 next_i:;
2212 next_multiplier:;
2214 return false;
2216 #endif
2218 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2219 results in FACTORS. */
2220 static void
2221 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2223 factors->nfactors = 0;
2224 factors->plarge[1] = 0;
2226 if (t1 == 0 && t0 < 2)
2227 return;
2229 t0 = factor_using_division (&t1, t1, t0, factors);
2231 if (t1 == 0 && t0 < 2)
2232 return;
2234 if (prime2_p (t1, t0))
2235 factor_insert_large (factors, t1, t0);
2236 else
2238 #if USE_SQUFOF
2239 if (factor_using_squfof (t1, t0, factors))
2240 return;
2241 #endif
2243 if (t1 == 0)
2244 factor_using_pollard_rho (t0, 1, factors);
2245 else
2246 factor_using_pollard_rho2 (t1, t0, 1, factors);
2250 /* Use Pollard-rho to compute the prime factors of
2251 arbitrary-precision T, and put the results in FACTORS. */
2252 static void
2253 mp_factor (mpz_t t, struct mp_factors *factors)
2255 mp_factor_init (factors);
2257 if (mpz_sgn (t) != 0)
2259 mp_factor_using_division (t, factors);
2261 if (mpz_cmp_ui (t, 1) != 0)
2263 devmsg ("[is number prime?] ");
2264 if (mp_prime_p (t))
2265 mp_factor_insert (factors, t);
2266 else
2267 mp_factor_using_pollard_rho (t, 1, factors);
2272 static strtol_error
2273 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2275 int lo_carry;
2276 uintmax_t hi = 0, lo = 0;
2278 strtol_error err = LONGINT_INVALID;
2280 /* Initial scan for invalid digits. */
2281 char const *p = s;
2282 for (;;)
2284 unsigned char c = *p++;
2285 if (c == 0)
2286 break;
2288 if (UNLIKELY (!ISDIGIT (c)))
2290 err = LONGINT_INVALID;
2291 break;
2294 err = LONGINT_OK; /* we've seen at least one valid digit */
2297 while (err == LONGINT_OK)
2299 unsigned char c = *s++;
2300 if (c == 0)
2301 break;
2303 c -= '0';
2305 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2307 err = LONGINT_OVERFLOW;
2308 break;
2310 hi = 10 * hi;
2312 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2313 lo_carry += 10 * lo < 2 * lo;
2315 lo = 10 * lo;
2316 lo += c;
2318 lo_carry += lo < c;
2319 hi += lo_carry;
2320 if (UNLIKELY (hi < lo_carry))
2322 err = LONGINT_OVERFLOW;
2323 break;
2327 *hip = hi;
2328 *lop = lo;
2330 return err;
2333 /* Structure and routines for buffering and outputting full lines,
2334 to support parallel operation efficiently. */
2335 static struct lbuf_
2337 char *buf;
2338 char *end;
2339 } lbuf;
2341 /* 512 is chosen to give good performance,
2342 and also is the max guaranteed size that
2343 consumers can read atomically through pipes.
2344 Also it's big enough to cater for max line length
2345 even with 128 bit uintmax_t. */
2346 #define FACTOR_PIPE_BUF 512
2348 static void
2349 lbuf_alloc (void)
2351 if (lbuf.buf)
2352 return;
2354 /* Double to ensure enough space for
2355 previous numbers + next number. */
2356 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2357 lbuf.end = lbuf.buf;
2360 /* Write complete LBUF to standard output. */
2361 static void
2362 lbuf_flush (void)
2364 size_t size = lbuf.end - lbuf.buf;
2365 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2366 write_error ();
2367 lbuf.end = lbuf.buf;
2370 /* Add a character C to LBUF and if it's a newline
2371 and enough bytes are already buffered,
2372 then write atomically to standard output. */
2373 static void
2374 lbuf_putc (char c)
2376 *lbuf.end++ = c;
2378 if (c == '\n')
2380 size_t buffered = lbuf.end - lbuf.buf;
2382 /* Provide immediate output for interactive use. */
2383 static int line_buffered = -1;
2384 if (line_buffered == -1)
2385 line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2386 if (line_buffered)
2387 lbuf_flush ();
2388 else if (buffered >= FACTOR_PIPE_BUF)
2390 /* Write output in <= PIPE_BUF chunks
2391 so consumers can read atomically. */
2392 char const *tend = lbuf.end;
2394 /* Since a umaxint_t's factors must fit in 512
2395 we're guaranteed to find a newline here. */
2396 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2397 while (*--tlend != '\n');
2398 tlend++;
2400 lbuf.end = tlend;
2401 lbuf_flush ();
2403 /* Buffer the remainder. */
2404 memcpy (lbuf.buf, tlend, tend - tlend);
2405 lbuf.end = lbuf.buf + (tend - tlend);
2410 /* Buffer an int to the internal LBUF. */
2411 static void
2412 lbuf_putint (uintmax_t i, size_t min_width)
2414 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2415 char const *umaxstr = umaxtostr (i, buf);
2416 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2417 size_t z = width;
2419 for (; z < min_width; z++)
2420 *lbuf.end++ = '0';
2422 memcpy (lbuf.end, umaxstr, width);
2423 lbuf.end += width;
2426 static void
2427 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2429 uintmax_t q, r;
2431 if (t1 == 0)
2432 lbuf_putint (t0, 0);
2433 else
2435 /* Use very plain code here since it seems hard to write fast code
2436 without assuming a specific word size. */
2437 q = t1 / 1000000000;
2438 r = t1 % 1000000000;
2439 udiv_qrnnd (t0, r, r, t0, 1000000000);
2440 print_uintmaxes (q, t0);
2441 lbuf_putint (r, 9);
2445 /* Single-precision factoring */
2446 static void
2447 print_factors_single (uintmax_t t1, uintmax_t t0)
2449 struct factors factors;
2451 print_uintmaxes (t1, t0);
2452 lbuf_putc (':');
2454 factor (t1, t0, &factors);
2456 for (int j = 0; j < factors.nfactors; j++)
2457 for (int k = 0; k < factors.e[j]; k++)
2459 lbuf_putc (' ');
2460 print_uintmaxes (0, factors.p[j]);
2461 if (print_exponents && factors.e[j] > 1)
2463 lbuf_putc ('^');
2464 lbuf_putint (factors.e[j], 0);
2465 break;
2469 if (factors.plarge[1])
2471 lbuf_putc (' ');
2472 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2475 lbuf_putc ('\n');
2478 /* Emit the factors of the indicated number. If we have the option of using
2479 either algorithm, we select on the basis of the length of the number.
2480 For longer numbers, we prefer the MP algorithm even if the native algorithm
2481 has enough digits, because the algorithm is better. The turnover point
2482 depends on the value. */
2483 static bool
2484 print_factors (char const *input)
2486 /* Skip initial spaces and '+'. */
2487 char const *str = input;
2488 while (*str == ' ')
2489 str++;
2490 str += *str == '+';
2492 uintmax_t t1, t0;
2494 /* Try converting the number to one or two words. If it fails, use GMP or
2495 print an error message. The 2nd condition checks that the most
2496 significant bit of the two-word number is clear, in a typesize neutral
2497 way. */
2498 strtol_error err = strto2uintmax (&t1, &t0, str);
2500 switch (err)
2502 case LONGINT_OK:
2503 if (((t1 << 1) >> 1) == t1)
2505 devmsg ("[using single-precision arithmetic] ");
2506 print_factors_single (t1, t0);
2507 return true;
2509 break;
2511 case LONGINT_OVERFLOW:
2512 /* Try GMP. */
2513 break;
2515 default:
2516 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2517 return false;
2520 devmsg ("[using arbitrary-precision arithmetic] ");
2521 mpz_t t;
2522 struct mp_factors factors;
2524 mpz_init_set_str (t, str, 10);
2526 mpz_out_str (stdout, 10, t);
2527 putchar (':');
2528 mp_factor (t, &factors);
2530 for (idx_t j = 0; j < factors.nfactors; j++)
2531 for (unsigned long int k = 0; k < factors.e[j]; k++)
2533 putchar (' ');
2534 mpz_out_str (stdout, 10, factors.p[j]);
2535 if (print_exponents && factors.e[j] > 1)
2537 printf ("^%lu", factors.e[j]);
2538 break;
2542 mp_factor_clear (&factors);
2543 mpz_clear (t);
2544 putchar ('\n');
2545 fflush (stdout);
2546 return true;
2549 void
2550 usage (int status)
2552 if (status != EXIT_SUCCESS)
2553 emit_try_help ();
2554 else
2556 printf (_("\
2557 Usage: %s [OPTION] [NUMBER]...\n\
2559 program_name);
2560 fputs (_("\
2561 Print the prime factors of each specified integer NUMBER. If none\n\
2562 are specified on the command line, read them from standard input.\n\
2564 "), stdout);
2565 fputs ("\
2566 -h, --exponents print repeated factors in form p^e unless e is 1\n\
2567 ", stdout);
2568 fputs (HELP_OPTION_DESCRIPTION, stdout);
2569 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2570 emit_ancillary_info (PROGRAM_NAME);
2572 exit (status);
2575 static bool
2576 do_stdin (void)
2578 bool ok = true;
2579 token_buffer tokenbuffer;
2581 init_tokenbuffer (&tokenbuffer);
2583 while (true)
2585 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2586 &tokenbuffer);
2587 if (token_length == (size_t) -1)
2589 if (ferror (stdin))
2590 error (EXIT_FAILURE, errno, _("error reading input"));
2591 break;
2594 ok &= print_factors (tokenbuffer.buffer);
2596 free (tokenbuffer.buffer);
2598 return ok;
2602 main (int argc, char **argv)
2604 initialize_main (&argc, &argv);
2605 set_program_name (argv[0]);
2606 setlocale (LC_ALL, "");
2607 bindtextdomain (PACKAGE, LOCALEDIR);
2608 textdomain (PACKAGE);
2610 lbuf_alloc ();
2611 atexit (close_stdout);
2612 atexit (lbuf_flush);
2614 int c;
2615 while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1)
2617 switch (c)
2619 case 'h': /* NetBSD used -h for this functionality first. */
2620 print_exponents = true;
2621 break;
2623 case DEV_DEBUG_OPTION:
2624 dev_debug = true;
2625 break;
2627 case_GETOPT_HELP_CHAR;
2629 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2631 default:
2632 usage (EXIT_FAILURE);
2636 #if STAT_SQUFOF
2637 memset (q_freq, 0, sizeof (q_freq));
2638 #endif
2640 bool ok;
2641 if (argc <= optind)
2642 ok = do_stdin ();
2643 else
2645 ok = true;
2646 for (int i = optind; i < argc; i++)
2647 if (! print_factors (argv[i]))
2648 ok = false;
2651 #if STAT_SQUFOF
2652 if (q_freq[0] > 0)
2654 double acc_f;
2655 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2656 for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2658 double f = (double) q_freq[i] / q_freq[0];
2659 acc_f += f;
2660 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2661 100.0 * f, 100.0 * acc_f);
2664 #endif
2666 return ok ? EXIT_SUCCESS : EXIT_FAILURE;