tests: add fold(1) test for --bytes option
[coreutils.git] / src / factor.c
blobcba32d8743800d3154a42940fe67c19d64573856
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organization:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Algorithm:
40 (1) Perform trial division using a small primes table, but without hardware
41 division since the primes table store inverses modulo the word base.
42 (The GMP variant of this code doesn't make use of the precomputed
43 inverses, but instead relies on GMP for fast divisibility testing.)
44 (2) Check the nature of any non-factored part using Miller-Rabin for
45 detecting composites, and Lucas for detecting primes.
46 (3) Factor any remaining composite part using the Pollard-Brent rho
47 algorithm or if USE_SQUFOF is defined to 1, try that first.
48 Status of found factors are checked again using Miller-Rabin and Lucas.
50 We prefer using Hensel norm in the divisions, not the more familiar
51 Euclidean norm, since the former leads to much faster code. In the
52 Pollard-Brent rho code and the prime testing code, we use Montgomery's
53 trick of multiplying all n-residues by the word base, allowing cheap Hensel
54 reductions mod n.
56 The GMP code uses an algorithm that can be considerably slower;
57 for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58 2^{127}-3 takes about 50 ms with the two-word algorithm but would
59 take about 750 ms with the GMP code.
61 Improvements:
63 * Use modular inverses also for exact division in the Lucas code, and
64 elsewhere. A problem is to locate the inverses not from an index, but
65 from a prime. We might instead compute the inverse on-the-fly.
67 * Tune trial division table size (not forgetting that this is a standalone
68 program where the table will be read from secondary storage for
69 each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidean)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdbit.h>
107 #include <stdio.h>
108 #include <gmp.h>
110 #include "system.h"
111 #include "assure.h"
112 #include "full-write.h"
113 #include "quote.h"
114 #include "readtokens.h"
115 #include "xstrtol.h"
117 /* The official name of this program (e.g., no 'g' prefix). */
118 #define PROGRAM_NAME "factor"
120 #define AUTHORS \
121 proper_name ("Paul Rubin"), \
122 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
123 proper_name_lite ("Niels Moller", "Niels M\303\266ller")
125 /* Token delimiters when reading from a file. */
126 #define DELIM "\n\t "
128 #ifndef USE_LONGLONG_H
129 /* With the way we use longlong.h, it's only safe to use
130 when UWtype = UHWtype, as there were various cases
131 (as can be seen in the history for longlong.h) where
132 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
133 to avoid compile time or run time issues. */
134 # if LONG_MAX == INTMAX_MAX
135 # define USE_LONGLONG_H 1
136 # endif
137 #endif
139 #define W_TYPE_SIZE UINTMAX_WIDTH
141 #if USE_LONGLONG_H
143 /* Make definitions for longlong.h to make it do what it can do for us */
145 # define UWtype uintmax_t
146 # define UHWtype unsigned long int
147 # undef UDWtype
148 # if HAVE_ATTRIBUTE_MODE
149 typedef unsigned int UQItype __attribute__ ((mode (QI)));
150 typedef int SItype __attribute__ ((mode (SI)));
151 typedef unsigned int USItype __attribute__ ((mode (SI)));
152 typedef int DItype __attribute__ ((mode (DI)));
153 typedef unsigned int UDItype __attribute__ ((mode (DI)));
154 # else
155 typedef unsigned char UQItype;
156 typedef long SItype;
157 typedef unsigned long int USItype;
158 # if HAVE_LONG_LONG_INT
159 typedef long long int DItype;
160 typedef unsigned long long int UDItype;
161 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
162 typedef long int DItype;
163 typedef unsigned long int UDItype;
164 # endif
165 # endif
166 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
167 # define ASSERT(x) /* FIXME make longlong.h really standalone */
168 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
169 # ifndef __GMP_GNUC_PREREQ
170 # define __GMP_GNUC_PREREQ(a,b) 1
171 # endif
173 /* longlong.h uses these macros only in certain system compiler combinations.
174 Ensure usage to pacify -Wunused-macros. */
175 #if defined ASSERT || defined UHWtype || defined __GMP_DECLSPEC
176 #endif
178 # if _ARCH_PPC
179 # define HAVE_HOST_CPU_FAMILY_powerpc 1
180 # endif
181 # include "longlong.h"
183 #else /* not USE_LONGLONG_H */
185 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
186 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
187 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
189 #endif
191 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
192 #define MAX_NFACTS 26
194 enum
196 DEV_DEBUG_OPTION = CHAR_MAX + 1
199 static struct option const long_options[] =
201 {"exponents", no_argument, nullptr, 'h'},
202 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
203 {GETOPT_HELP_OPTION_DECL},
204 {GETOPT_VERSION_OPTION_DECL},
205 {nullptr, 0, nullptr, 0}
208 /* If true, use p^e output format. */
209 static bool print_exponents;
211 /* This represents an unsigned integer twice as wide as uintmax_t. */
212 typedef struct { uintmax_t uu[2]; } uuint;
214 /* Accessors and constructors for the type. Pprograms should not
215 access the type's internals directly, in case some future version
216 replaces the type with unsigned __int128 or whatever. */
217 static uintmax_t lo (uuint u) { return u.uu[0]; }
218 static uintmax_t hi (uuint u) { return u.uu[1]; }
219 static void hiset (uuint *u, uintmax_t hi) { u->uu[1] = hi; }
220 static void
221 uuset (uintmax_t *phi, uintmax_t *plo, uuint uu)
223 *phi = hi (uu);
224 *plo = lo (uu);
226 static uuint
227 make_uuint (uintmax_t hi, uintmax_t lo)
229 return (uuint) {{lo, hi}};
232 /* BIG_POWER_OF_10 is a positive power of 10 that does not exceed UINTMAX_MAX.
233 The larger it is, the more efficient the code will likely be.
234 LOG_BIG_POWER_OF_10 = log (BIG_POWER_OF_10). */
235 #if UINTMAX_WIDTH < 64
236 # error "platform does not support 64-bit integers"
237 #elif UINTMAX_WIDTH < 128 || !defined UINTMAX_C
238 /* Mainstream platforms as of 2024, with at-least-64-bit uintmax_t. */
239 static uintmax_t const BIG_POWER_OF_10 = 10000000000000000000llu;
240 enum { LOG_BIG_POWER_OF_10 = 19 };
241 #else
242 /* For so-far-only-theoretical platforms with at-least-128-bit uintmax_t.
243 This is for performance; the 64-bit mainstream code will still work. */
244 static uintmax_t const BIG_POWER_OF_10 =
245 UINTMAX_C (100000000000000000000000000000000000000);
246 enum { LOG_BIG_POWER_OF_10 = 38 };
247 #endif
249 struct factors
251 uuint plarge; /* Can have a single large factor */
252 uintmax_t p[MAX_NFACTS];
253 unsigned char e[MAX_NFACTS];
254 unsigned char nfactors;
257 struct mp_factors
259 mpz_t *p;
260 unsigned long int *e;
261 idx_t nfactors;
262 idx_t nalloc;
265 static void factor (uintmax_t, uintmax_t, struct factors *);
267 #ifndef umul_ppmm
268 # define umul_ppmm(w1, w0, u, v) \
269 do { \
270 uintmax_t __x0, __x1, __x2, __x3; \
271 unsigned long int __ul, __vl, __uh, __vh; \
272 uintmax_t __u = (u), __v = (v); \
274 __ul = __ll_lowpart (__u); \
275 __uh = __ll_highpart (__u); \
276 __vl = __ll_lowpart (__v); \
277 __vh = __ll_highpart (__v); \
279 __x0 = (uintmax_t) __ul * __vl; \
280 __x1 = (uintmax_t) __ul * __vh; \
281 __x2 = (uintmax_t) __uh * __vl; \
282 __x3 = (uintmax_t) __uh * __vh; \
284 __x1 += __ll_highpart (__x0);/* This can't give carry. */ \
285 __x1 += __x2; /* But this indeed can. */ \
286 if (__x1 < __x2) /* Did we get it? */ \
287 __x3 += __ll_B; /* Yes, add it in the proper pos. */ \
289 (w1) = __x3 + __ll_highpart (__x1); \
290 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
291 } while (0)
292 #endif
294 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
295 /* Define our own, not needing normalization. This function is
296 currently not performance critical, so keep it simple. Similar to
297 the mod macro below. */
298 # undef udiv_qrnnd
299 # define udiv_qrnnd(q, r, n1, n0, d) \
300 do { \
301 uintmax_t __d1, __d0, __q, __r1, __r0; \
303 __d1 = (d); __d0 = 0; \
304 __r1 = (n1); __r0 = (n0); \
305 affirm (__r1 < __d1); \
306 __q = 0; \
307 for (int __i = W_TYPE_SIZE; __i > 0; __i--) \
309 rsh2 (__d1, __d0, __d1, __d0, 1); \
310 __q <<= 1; \
311 if (ge2 (__r1, __r0, __d1, __d0)) \
313 __q++; \
314 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
317 (r) = __r0; \
318 (q) = __q; \
319 } while (0)
320 #endif
322 #if !defined add_ssaaaa
323 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
324 do { \
325 uintmax_t _add_x; \
326 _add_x = (al) + (bl); \
327 (sh) = (ah) + (bh) + (_add_x < (al)); \
328 (sl) = _add_x; \
329 } while (0)
330 #endif
332 /* Set (rh,rl) = (ah,al) >> cnt, where 0 < cnt < W_TYPE_SIZE. */
333 #define rsh2(rh, rl, ah, al, cnt) \
334 do { \
335 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
336 (rh) = (ah) >> (cnt); \
337 } while (0)
339 /* Set (rh,rl) = (ah,al) << cnt, where 0 < cnt < W_TYPE_SIZE. */
340 #define lsh2(rh, rl, ah, al, cnt) \
341 do { \
342 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
343 (rl) = (al) << (cnt); \
344 } while (0)
346 #define ge2(ah, al, bh, bl) \
347 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
349 #define gt2(ah, al, bh, bl) \
350 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
352 #ifndef sub_ddmmss
353 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
354 do { \
355 uintmax_t _cy; \
356 _cy = (al) < (bl); \
357 (rl) = (al) - (bl); \
358 (rh) = (ah) - (bh) - _cy; \
359 } while (0)
360 #endif
362 /* Requires that a < n and b <= n */
363 #define submod(r,a,b,n) \
364 do { \
365 uintmax_t _t = - (uintmax_t) (a < b); \
366 (r) = ((n) & _t) + (a) - (b); \
367 } while (0)
369 #define addmod(r,a,b,n) \
370 submod ((r), (a), ((n) - (b)), (n))
372 /* Modular two-word addition and subtraction. For performance reasons, the
373 most significant bit of n1 must be clear. The destination variables must be
374 distinct from the mod operand. */
375 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
376 do { \
377 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
378 if (ge2 ((r1), (r0), (n1), (n0))) \
379 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
380 } while (0)
381 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
382 do { \
383 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
384 if ((intmax_t) (r1) < 0) \
385 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
386 } while (0)
388 #define HIGHBIT_TO_MASK(x) \
389 (((intmax_t)-1 >> 1) < 0 \
390 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
391 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
392 ? UINTMAX_MAX : (uintmax_t) 0))
394 /* Return r = a mod d, where a = <a1,a0>, d = <d1,d0>.
395 Requires that d1 != 0. */
396 ATTRIBUTE_PURE static uuint
397 mod2 (uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
399 affirm (d1 != 0);
401 if (a1)
403 int cntd = stdc_leading_zeros (d1);
404 int cnta = stdc_leading_zeros (a1);
405 int cnt = cntd - cnta;
406 if (0 < cnt)
408 lsh2 (d1, d0, d1, d0, cnt);
409 for (int i = 0; i < cnt; i++)
411 if (ge2 (a1, a0, d1, d0))
412 sub_ddmmss (a1, a0, a1, a0, d1, d0);
413 rsh2 (d1, d0, d1, d0, 1);
418 return make_uuint (a1, a0);
421 ATTRIBUTE_CONST
422 static uintmax_t
423 gcd_odd (uintmax_t a, uintmax_t b)
425 if ((b & 1) == 0)
427 uintmax_t t = b;
428 b = a;
429 a = t;
431 if (a == 0)
432 return b;
434 /* Take out least significant one bit, to make room for sign */
435 b >>= 1;
437 for (;;)
439 uintmax_t t;
440 uintmax_t bgta;
442 assume (a);
443 a >>= stdc_trailing_zeros (a);
444 a >>= 1;
446 t = a - b;
447 if (t == 0)
448 return (a << 1) + 1;
450 bgta = HIGHBIT_TO_MASK (t);
452 /* b <-- min (a, b) */
453 b += (bgta & t);
455 /* a <-- |a - b| */
456 a = (t ^ bgta) - bgta;
460 ATTRIBUTE_PURE static uuint
461 gcd2_odd (uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
463 affirm (b0 & 1);
465 if ((a0 | a1) == 0)
466 return make_uuint (b1, b0);
467 if (!a0)
468 a0 = a1, a1 = 0;
469 assume (a0);
470 int ctz = stdc_trailing_zeros (a0);
471 if (ctz)
472 rsh2 (a1, a0, a1, a0, ctz);
474 for (;;)
476 if ((b1 | a1) == 0)
477 return make_uuint (0, gcd_odd (b0, a0));
479 if (gt2 (a1, a0, b1, b0))
481 sub_ddmmss (a1, a0, a1, a0, b1, b0);
482 if (!a0)
483 a0 = a1, a1 = 0;
484 assume (a0);
485 ctz = stdc_trailing_zeros (a0);
486 if (ctz)
487 rsh2 (a1, a0, a1, a0, ctz);
489 else if (gt2 (b1, b0, a1, a0))
491 sub_ddmmss (b1, b0, b1, b0, a1, a0);
492 if (!b0)
493 b0 = b1, b1 = 0;
494 assume (b0);
495 ctz = stdc_trailing_zeros (b0);
496 if (ctz)
497 rsh2 (b1, b0, b1, b0, ctz);
499 else
500 break;
503 return make_uuint (a1, a0);
506 static void
507 factor_insert_multiplicity (struct factors *factors,
508 uintmax_t prime, int m)
510 int nfactors = factors->nfactors;
511 uintmax_t *p = factors->p;
512 unsigned char *e = factors->e;
514 /* Locate position for insert new or increment e. */
515 int i;
516 for (i = nfactors - 1; i >= 0; i--)
518 if (p[i] <= prime)
519 break;
522 if (i < 0 || p[i] != prime)
524 for (int j = nfactors - 1; j > i; j--)
526 p[j + 1] = p[j];
527 e[j + 1] = e[j];
529 p[i + 1] = prime;
530 e[i + 1] = m;
531 factors->nfactors = nfactors + 1;
533 else
535 e[i] += m;
539 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
541 static void
542 factor_insert_large (struct factors *factors,
543 uintmax_t p1, uintmax_t p0)
545 if (p1 > 0)
547 affirm (hi (factors->plarge) == 0);
548 factors->plarge = make_uuint (p1, p0);
550 else
551 factor_insert (factors, p0);
554 #ifndef mpz_inits
556 # include <stdarg.h>
558 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
559 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
561 static void
562 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
564 va_list ap;
566 va_start (ap, mpz_single_init);
568 mpz_t *mpz;
569 while ((mpz = va_arg (ap, mpz_t *)))
570 mpz_single_init (*mpz);
572 va_end (ap);
574 #endif
576 static void mp_factor (mpz_t, struct mp_factors *);
578 static void
579 mp_factor_init (struct mp_factors *factors)
581 factors->p = nullptr;
582 factors->e = nullptr;
583 factors->nfactors = 0;
584 factors->nalloc = 0;
587 static void
588 mp_factor_clear (struct mp_factors *factors)
590 for (idx_t i = 0; i < factors->nfactors; i++)
591 mpz_clear (factors->p[i]);
593 free (factors->p);
594 free (factors->e);
597 static void
598 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
600 idx_t nfactors = factors->nfactors;
601 mpz_t *p = factors->p;
602 unsigned long int *e = factors->e;
603 ptrdiff_t i;
605 /* Locate position for insert new or increment e. */
606 for (i = nfactors - 1; i >= 0; i--)
608 if (mpz_cmp (p[i], prime) <= 0)
609 break;
612 if (i < 0 || mpz_cmp (p[i], prime) != 0)
614 if (factors->nfactors == factors->nalloc)
616 p = xpalloc (p, &factors->nalloc, 1, -1, sizeof *p);
617 e = xireallocarray (e, factors->nalloc, sizeof *e);
620 mpz_init (p[nfactors]);
621 for (ptrdiff_t j = nfactors - 1; j > i; j--)
623 mpz_set (p[j + 1], p[j]);
624 e[j + 1] = e[j];
626 mpz_set (p[i + 1], prime);
627 e[i + 1] = 1;
629 factors->p = p;
630 factors->e = e;
631 factors->nfactors = nfactors + 1;
633 else
635 e[i] += 1;
639 static void
640 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
642 mpz_t pz;
644 mpz_init_set_ui (pz, prime);
645 mp_factor_insert (factors, pz);
646 mpz_clear (pz);
650 #define P(a,b,c,d) a,
651 static const unsigned char primes_diff[] = {
652 #include "primes.h"
653 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
655 #undef P
657 #define PRIMES_PTAB_ENTRIES (ARRAY_CARDINALITY (primes_diff) - 8 + 1)
659 #define P(a,b,c,d) b,
660 static const unsigned char primes_diff8[] = {
661 #include "primes.h"
662 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
664 #undef P
666 struct primes_dtab
668 uintmax_t binv, lim;
671 #define P(a,b,c,d) {c,d},
672 static const struct primes_dtab primes_dtab[] = {
673 #include "primes.h"
674 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
676 #undef P
678 /* Verify that uintmax_t is not wider than
679 the integers used to generate primes.h. */
680 static_assert (UINTMAX_WIDTH <= WIDE_UINT_BITS);
682 /* debugging for developers. Enables devmsg().
683 This flag is used only in the GMP code. */
684 static bool dev_debug = false;
686 /* Prove primality or run probabilistic tests. */
687 static bool flag_prove_primality = PROVE_PRIMALITY;
689 /* Number of Miller-Rabin tests to run when not proving primality. */
690 #define MR_REPS 25
692 static void
693 factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off)
695 for (int j = 0; j < off; j++)
696 p += primes_diff[i + j];
697 factor_insert (factors, p);
700 /* Trial division with odd primes uses the following trick.
702 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
703 consider the case t < B (this is the second loop below).
705 From our tables we get
707 binv = p^{-1} (mod B)
708 lim = floor ((B-1) / p).
710 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
711 (and all quotients in this range occur for some t).
713 Then t = q * p is true also (mod B), and p is invertible we get
715 q = t * binv (mod B).
717 Next, assume that t is *not* divisible by p. Since multiplication
718 by binv (mod B) is a one-to-one mapping,
720 t * binv (mod B) > lim,
722 because all the smaller values are already taken.
724 This can be summed up by saying that the function
726 q(t) = binv * t (mod B)
728 is a permutation of the range 0 <= t < B, with the curious property
729 that it maps the multiples of p onto the range 0 <= q <= lim, in
730 order, and the non-multiples of p onto the range lim < q < B.
733 static uuint
734 factor_using_division (uintmax_t t1, uintmax_t t0,
735 struct factors *factors)
737 if (t0 % 2 == 0)
739 int cnt;
741 if (t0 == 0)
743 assume (t1);
744 cnt = stdc_trailing_zeros (t1);
745 t0 = t1 >> cnt;
746 t1 = 0;
747 cnt += W_TYPE_SIZE;
749 else
751 cnt = stdc_trailing_zeros (t0);
752 rsh2 (t1, t0, t1, t0, cnt);
755 factor_insert_multiplicity (factors, 2, cnt);
758 uintmax_t p = 3;
759 idx_t i;
760 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
762 for (;;)
764 uintmax_t q1, q0, hi;
765 MAYBE_UNUSED uintmax_t lo;
767 q0 = t0 * primes_dtab[i].binv;
768 umul_ppmm (hi, lo, q0, p);
769 if (hi > t1)
770 break;
771 hi = t1 - hi;
772 q1 = hi * primes_dtab[i].binv;
773 if (LIKELY (q1 > primes_dtab[i].lim))
774 break;
775 t1 = q1; t0 = q0;
776 factor_insert (factors, p);
778 p += primes_diff[i + 1];
781 #define DIVBLOCK(I) \
782 do { \
783 for (;;) \
785 q = t0 * pd[I].binv; \
786 if (LIKELY (q > pd[I].lim)) \
787 break; \
788 t0 = q; \
789 factor_insert_refind (factors, p, i + 1, I); \
791 } while (0)
793 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
795 uintmax_t q;
796 const struct primes_dtab *pd = &primes_dtab[i];
797 DIVBLOCK (0);
798 DIVBLOCK (1);
799 DIVBLOCK (2);
800 DIVBLOCK (3);
801 DIVBLOCK (4);
802 DIVBLOCK (5);
803 DIVBLOCK (6);
804 DIVBLOCK (7);
806 p += primes_diff8[i];
807 if (p * p > t0)
808 break;
811 return make_uuint (t1, t0);
814 static void
815 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
817 mpz_t q;
818 mp_bitcnt_t p;
820 devmsg ("[trial division] ");
822 mpz_init (q);
824 p = mpz_scan1 (t, 0);
825 mpz_fdiv_q_2exp (t, t, p);
826 while (p)
828 mp_factor_insert_ui (factors, 2);
829 --p;
832 unsigned long int d = 3;
833 for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;)
835 if (! mpz_divisible_ui_p (t, d))
837 d += primes_diff[i++];
838 if (mpz_cmp_ui (t, d * d) < 0)
839 break;
841 else
843 mpz_tdiv_q_ui (t, t, d);
844 mp_factor_insert_ui (factors, d);
848 mpz_clear (q);
851 /* Entry i contains (2i+1)^(-1) mod 2^8. */
852 static const unsigned char binvert_table[128] =
854 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
855 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
856 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
857 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
858 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
859 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
860 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
861 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
862 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
863 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
864 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
865 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
866 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
867 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
868 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
869 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
872 /* Compute n^(-1) mod B, using a Newton iteration. */
873 #define binv(inv,n) \
874 do { \
875 uintmax_t __n = (n); \
876 uintmax_t __inv; \
878 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
879 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
880 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
881 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
883 if (W_TYPE_SIZE > 64) \
885 int __invbits = 64; \
886 do { \
887 __inv = 2 * __inv - __inv * __inv * __n; \
888 __invbits *= 2; \
889 } while (__invbits < W_TYPE_SIZE); \
892 (inv) = __inv; \
893 } while (0)
895 /* q = u / d, assuming d|u. */
896 #define divexact_21(q1, q0, u1, u0, d) \
897 do { \
898 uintmax_t _di, _q0; \
899 binv (_di, (d)); \
900 _q0 = (u0) * _di; \
901 if ((u1) >= (d)) \
903 uintmax_t _p1; \
904 MAYBE_UNUSED intmax_t _p0; \
905 umul_ppmm (_p1, _p0, _q0, d); \
906 (q1) = ((u1) - _p1) * _di; \
907 (q0) = _q0; \
909 else \
911 (q0) = _q0; \
912 (q1) = 0; \
914 } while (0)
916 /* x B (mod n). */
917 #define redcify(r_prim, r, n) \
918 do { \
919 MAYBE_UNUSED uintmax_t _redcify_q; \
920 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
921 } while (0)
923 /* x B^2 (mod n). Requires x > 0, n1 < B/2. */
924 #define redcify2(r1, r0, x, n1, n0) \
925 do { \
926 uintmax_t _r1, _r0, _i; \
927 if ((x) < (n1)) \
929 _r1 = (x); _r0 = 0; \
930 _i = W_TYPE_SIZE; \
932 else \
934 _r1 = 0; _r0 = (x); \
935 _i = 2 * W_TYPE_SIZE; \
937 while (_i-- > 0) \
939 lsh2 (_r1, _r0, _r1, _r0, 1); \
940 if (ge2 (_r1, _r0, (n1), (n0))) \
941 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
943 (r1) = _r1; \
944 (r0) = _r0; \
945 } while (0)
947 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
948 Both a and b must be in redc form, the result will be in redc form too. */
949 static inline uintmax_t
950 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
952 uintmax_t rh, rl, q, th, xh;
953 MAYBE_UNUSED uintmax_t tl;
955 umul_ppmm (rh, rl, a, b);
956 q = rl * mi;
957 umul_ppmm (th, tl, q, m);
958 xh = rh - th;
959 if (rh < th)
960 xh += m;
962 return xh;
965 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
966 Both a and b must be in redc form, the result will be in redc form too.
967 For performance reasons, the most significant bit of m must be clear. */
968 static uintmax_t
969 mulredc2 (uintmax_t *r1p,
970 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
971 uintmax_t m1, uintmax_t m0, uintmax_t mi)
973 uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
974 MAYBE_UNUSED uintmax_t p0;
975 mi = -mi;
976 affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0);
977 affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0);
978 affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0);
980 /* First compute a0 * <b1, b0> B^{-1}
981 +-----+
982 |a0 b0|
983 +--+--+--+
984 |a0 b1|
985 +--+--+--+
986 |q0 m0|
987 +--+--+--+
988 |q0 m1|
989 -+--+--+--+
990 |r1|r0| 0|
991 +--+--+--+
993 umul_ppmm (t1, t0, a0, b0);
994 umul_ppmm (r1, r0, a0, b1);
995 q = mi * t0;
996 umul_ppmm (p1, p0, q, m0);
997 umul_ppmm (s1, s0, q, m1);
998 r0 += (t0 != 0); /* Carry */
999 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1000 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1001 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1003 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1004 +-----+
1005 |a1 b0|
1006 +--+--+
1007 |r1|r0|
1008 +--+--+--+
1009 |a1 b1|
1010 +--+--+--+
1011 |q1 m0|
1012 +--+--+--+
1013 |q1 m1|
1014 -+--+--+--+
1015 |r1|r0| 0|
1016 +--+--+--+
1018 umul_ppmm (t1, t0, a1, b0);
1019 umul_ppmm (s1, s0, a1, b1);
1020 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1021 q = mi * t0;
1022 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1023 umul_ppmm (p1, p0, q, m0);
1024 umul_ppmm (s1, s0, q, m1);
1025 r0 += (t0 != 0); /* Carry */
1026 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1027 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1028 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1030 if (ge2 (r1, r0, m1, m0))
1031 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1033 *r1p = r1;
1034 return r0;
1037 ATTRIBUTE_CONST
1038 static uintmax_t
1039 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1041 uintmax_t y = one;
1043 if (e & 1)
1044 y = b;
1046 while (e != 0)
1048 b = mulredc (b, b, n, ni);
1049 e >>= 1;
1051 if (e & 1)
1052 y = mulredc (y, b, n, ni);
1055 return y;
1058 ATTRIBUTE_PURE static uuint
1059 powm2 (const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1060 uintmax_t ni, const uintmax_t *one)
1062 uintmax_t r1, r0, b1, b0, n1, n0;
1063 int i;
1064 uintmax_t e;
1066 b0 = bp[0];
1067 b1 = bp[1];
1068 n0 = np[0];
1069 n1 = np[1];
1071 r0 = one[0];
1072 r1 = one[1];
1074 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1076 if (e & 1)
1078 uintmax_t r1m1;
1079 r0 = mulredc2 (&r1m1, r1, r0, b1, b0, n1, n0, ni);
1080 r1 = r1m1;
1082 uintmax_t r1m;
1083 b0 = mulredc2 (&r1m, b1, b0, b1, b0, n1, n0, ni);
1084 b1 = r1m;
1086 for (e = ep[1]; e > 0; e >>= 1)
1088 if (e & 1)
1090 uintmax_t r1m1;
1091 r0 = mulredc2 (&r1m1, r1, r0, b1, b0, n1, n0, ni);
1092 r1 = r1m1;
1094 uintmax_t r1m;
1095 b0 = mulredc2 (&r1m, b1, b0, b1, b0, n1, n0, ni);
1096 b1 = r1m;
1098 return make_uuint (r1, r0);
1101 ATTRIBUTE_CONST
1102 static bool
1103 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1104 int k, uintmax_t one)
1106 uintmax_t y = powm (b, q, n, ni, one);
1108 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1110 if (y == one || y == nm1)
1111 return true;
1113 for (int i = 1; i < k; i++)
1115 y = mulredc (y, y, n, ni);
1117 if (y == nm1)
1118 return true;
1119 if (y == one)
1120 return false;
1122 return false;
1125 ATTRIBUTE_PURE static bool
1126 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1127 const uintmax_t *qp, int k, const uintmax_t *one)
1129 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1131 uuset (&y1, &y0, powm2 (bp, qp, np, ni, one));
1133 if (y0 == one[0] && y1 == one[1])
1134 return true;
1136 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1138 if (y0 == nm1_0 && y1 == nm1_1)
1139 return true;
1141 for (int i = 1; i < k; i++)
1143 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1144 y1 = r1m;
1146 if (y0 == nm1_0 && y1 == nm1_1)
1147 return true;
1148 if (y0 == one[0] && y1 == one[1])
1149 return false;
1151 return false;
1154 static bool
1155 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1156 mpz_srcptr q, mp_bitcnt_t k)
1158 mpz_powm (y, x, q, n);
1160 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1161 return true;
1163 for (mp_bitcnt_t i = 1; i < k; i++)
1165 mpz_powm_ui (y, y, 2, n);
1166 if (mpz_cmp (y, nm1) == 0)
1167 return true;
1168 if (mpz_cmp_ui (y, 1) == 0)
1169 return false;
1171 return false;
1174 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1175 have been observed. The average seem to be about 2. */
1176 static bool ATTRIBUTE_PURE
1177 prime_p (uintmax_t n)
1179 bool is_prime;
1180 uintmax_t a_prim, one, ni;
1181 struct factors factors;
1183 if (n <= 1)
1184 return false;
1186 /* We have already cast out small primes. */
1187 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1188 return true;
1190 /* Precomputation for Miller-Rabin. */
1191 int k = stdc_trailing_zeros (n - 1);
1192 uintmax_t q = (n - 1) >> k;
1194 uintmax_t a = 2;
1195 binv (ni, n); /* ni <- 1/n mod B */
1196 redcify (one, 1, n);
1197 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1199 /* Perform a Miller-Rabin test, finds most composites quickly. */
1200 if (!millerrabin (n, ni, a_prim, q, k, one))
1201 return false;
1203 if (flag_prove_primality)
1205 /* Factor n-1 for Lucas. */
1206 factor (0, n - 1, &factors);
1209 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1210 number composite. */
1211 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1213 if (flag_prove_primality)
1215 is_prime = true;
1216 for (int i = 0; i < factors.nfactors && is_prime; i++)
1218 is_prime
1219 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1222 else
1224 /* After enough Miller-Rabin runs, be content. */
1225 is_prime = (r == MR_REPS - 1);
1228 if (is_prime)
1229 return true;
1231 a += primes_diff[r]; /* Establish new base. */
1233 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1234 on most processors, since it avoids udiv_qrnnd. If we go down the
1235 udiv_qrnnd_preinv path, this code should be replaced. */
1237 uintmax_t s1, s0;
1238 umul_ppmm (s1, s0, one, a);
1239 if (LIKELY (s1 == 0))
1240 a_prim = s0 % n;
1241 else
1243 MAYBE_UNUSED uintmax_t dummy;
1244 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1248 if (!millerrabin (n, ni, a_prim, q, k, one))
1249 return false;
1252 affirm (!"Lucas prime test failure. This should not happen");
1255 static bool ATTRIBUTE_PURE
1256 prime2_p (uintmax_t n1, uintmax_t n0)
1258 uintmax_t q[2], nm1[2];
1259 uintmax_t a_prim[2];
1260 uintmax_t one[2];
1261 uintmax_t na[2];
1262 uintmax_t ni;
1263 int k;
1264 struct factors factors;
1266 if (n1 == 0)
1267 return prime_p (n0);
1269 nm1[1] = n1 - (n0 == 0);
1270 nm1[0] = n0 - 1;
1271 if (nm1[0] == 0)
1273 assume (nm1[1]);
1274 k = stdc_trailing_zeros (nm1[1]);
1276 q[0] = nm1[1] >> k;
1277 q[1] = 0;
1278 k += W_TYPE_SIZE;
1280 else
1282 k = stdc_trailing_zeros (nm1[0]);
1283 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1286 uintmax_t a = 2;
1287 binv (ni, n0);
1288 redcify2 (one[1], one[0], 1, n1, n0);
1289 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1291 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1292 na[0] = n0;
1293 na[1] = n1;
1295 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1296 return false;
1298 if (flag_prove_primality)
1300 /* Factor n-1 for Lucas. */
1301 factor (nm1[1], nm1[0], &factors);
1304 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1305 number composite. */
1306 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1308 bool is_prime;
1309 uintmax_t e[2];
1310 uuint y;
1312 if (flag_prove_primality)
1314 is_prime = true;
1315 if (hi (factors.plarge))
1317 uintmax_t pi;
1318 binv (pi, lo (factors.plarge));
1319 e[0] = pi * nm1[0];
1320 e[1] = 0;
1321 y = powm2 (a_prim, e, na, ni, one);
1322 is_prime = (lo (y) != one[0] || hi (y) != one[1]);
1324 for (int i = 0; i < factors.nfactors && is_prime; i++)
1326 /* FIXME: We always have the factor 2. Do we really need to
1327 handle it here? We have done the same powering as part
1328 of millerrabin. */
1329 if (factors.p[i] == 2)
1330 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1331 else
1332 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1333 y = powm2 (a_prim, e, na, ni, one);
1334 is_prime = (lo (y) != one[0] || hi (y) != one[1]);
1337 else
1339 /* After enough Miller-Rabin runs, be content. */
1340 is_prime = (r == MR_REPS - 1);
1343 if (is_prime)
1344 return true;
1346 a += primes_diff[r]; /* Establish new base. */
1347 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1349 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1350 return false;
1353 affirm (!"Lucas prime test failure. This should not happen");
1356 static bool
1357 mp_prime_p (mpz_t n)
1359 bool is_prime;
1360 mpz_t q, a, nm1, tmp;
1361 struct mp_factors factors;
1363 if (mpz_cmp_ui (n, 1) <= 0)
1364 return false;
1366 /* We have already cast out small primes. */
1367 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1368 return true;
1370 mpz_inits (q, a, nm1, tmp, nullptr);
1372 /* Precomputation for Miller-Rabin. */
1373 mpz_sub_ui (nm1, n, 1);
1375 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1376 mp_bitcnt_t k = mpz_scan1 (nm1, 0);
1377 mpz_tdiv_q_2exp (q, nm1, k);
1379 mpz_set_ui (a, 2);
1381 /* Perform a Miller-Rabin test, finds most composites quickly. */
1382 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1384 is_prime = false;
1385 goto ret2;
1388 if (flag_prove_primality)
1390 /* Factor n-1 for Lucas. */
1391 mpz_set (tmp, nm1);
1392 mp_factor (tmp, &factors);
1395 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1396 number composite. */
1397 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1399 if (flag_prove_primality)
1401 is_prime = true;
1402 for (idx_t i = 0; i < factors.nfactors && is_prime; i++)
1404 mpz_divexact (tmp, nm1, factors.p[i]);
1405 mpz_powm (tmp, a, tmp, n);
1406 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1409 else
1411 /* After enough Miller-Rabin runs, be content. */
1412 is_prime = (r == MR_REPS - 1);
1415 if (is_prime)
1416 goto ret1;
1418 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1420 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1422 is_prime = false;
1423 goto ret1;
1427 affirm (!"Lucas prime test failure. This should not happen");
1429 ret1:
1430 if (flag_prove_primality)
1431 mp_factor_clear (&factors);
1432 ret2:
1433 mpz_clears (q, a, nm1, tmp, nullptr);
1435 return is_prime;
1438 static void
1439 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1440 struct factors *factors)
1442 uintmax_t x, z, y, P, t, ni, g;
1444 unsigned long int k = 1;
1445 unsigned long int l = 1;
1447 redcify (P, 1, n);
1448 addmod (x, P, P, n); /* i.e., redcify(2) */
1449 y = z = x;
1451 while (n != 1)
1453 affirm (a < n);
1455 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1457 for (;;)
1461 x = mulredc (x, x, n, ni);
1462 addmod (x, x, a, n);
1464 submod (t, z, x, n);
1465 P = mulredc (P, t, n, ni);
1467 if (k % 32 == 1)
1469 if (gcd_odd (P, n) != 1)
1470 goto factor_found;
1471 y = x;
1474 while (--k != 0);
1476 z = x;
1477 k = l;
1478 l = 2 * l;
1479 for (unsigned long int i = 0; i < k; i++)
1481 x = mulredc (x, x, n, ni);
1482 addmod (x, x, a, n);
1484 y = x;
1487 factor_found:
1490 y = mulredc (y, y, n, ni);
1491 addmod (y, y, a, n);
1493 submod (t, z, y, n);
1494 g = gcd_odd (t, n);
1496 while (g == 1);
1498 if (n == g)
1500 /* Found n itself as factor. Restart with different params. */
1501 factor_using_pollard_rho (n, a + 1, factors);
1502 return;
1505 n = n / g;
1507 if (!prime_p (g))
1508 factor_using_pollard_rho (g, a + 1, factors);
1509 else
1510 factor_insert (factors, g);
1512 if (prime_p (n))
1514 factor_insert (factors, n);
1515 break;
1518 x = x % n;
1519 z = z % n;
1520 y = y % n;
1524 static void
1525 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1526 struct factors *factors)
1528 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1530 unsigned long int k = 1;
1531 unsigned long int l = 1;
1533 redcify2 (P1, P0, 1, n1, n0);
1534 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1535 y1 = z1 = x1;
1536 y0 = z0 = x0;
1538 while (n1 != 0 || n0 != 1)
1540 binv (ni, n0);
1542 for (;;)
1546 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1547 x1 = r1m;
1548 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1550 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1551 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1552 P1 = r1m;
1554 if (k % 32 == 1)
1556 uuset (&g1, &g0, gcd2_odd (P1, P0, n1, n0));
1557 if (g1 != 0 || g0 != 1)
1558 goto factor_found;
1559 y1 = x1; y0 = x0;
1562 while (--k != 0);
1564 z1 = x1; z0 = x0;
1565 k = l;
1566 l = 2 * l;
1567 for (unsigned long int i = 0; i < k; i++)
1569 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1570 x1 = r1m;
1571 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1573 y1 = x1; y0 = x0;
1576 factor_found:
1579 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1580 y1 = r1m;
1581 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1583 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1584 uuset (&g1, &g0, gcd2_odd (t1, t0, n1, n0));
1586 while (g1 == 0 && g0 == 1);
1588 if (g1 == 0)
1590 /* The found factor is one word, and > 1. */
1591 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1593 if (!prime_p (g0))
1594 factor_using_pollard_rho (g0, a + 1, factors);
1595 else
1596 factor_insert (factors, g0);
1598 else
1600 /* The found factor is two words. This is highly unlikely, thus hard
1601 to trigger. Please be careful before you change this code! */
1602 uintmax_t ginv;
1604 if (n1 == g1 && n0 == g0)
1606 /* Found n itself as factor. Restart with different params. */
1607 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1608 return;
1611 /* Compute n = n / g. Since the result will fit one word,
1612 we can compute the quotient modulo B, ignoring the high
1613 divisor word. */
1614 binv (ginv, g0);
1615 n0 = ginv * n0;
1616 n1 = 0;
1618 if (!prime2_p (g1, g0))
1619 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1620 else
1621 factor_insert_large (factors, g1, g0);
1624 if (n1 == 0)
1626 if (prime_p (n0))
1628 factor_insert (factors, n0);
1629 break;
1632 factor_using_pollard_rho (n0, a, factors);
1633 return;
1636 if (prime2_p (n1, n0))
1638 factor_insert_large (factors, n1, n0);
1639 break;
1642 uuset (&x1, &x0, mod2 (x1, x0, n1, n0));
1643 uuset (&z1, &z0, mod2 (z1, z0, n1, n0));
1644 uuset (&y1, &y0, mod2 (y1, y0, n1, n0));
1648 static void
1649 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1650 struct mp_factors *factors)
1652 mpz_t x, z, y, P;
1653 mpz_t t, t2;
1655 devmsg ("[pollard-rho (%lu)] ", a);
1657 mpz_inits (t, t2, nullptr);
1658 mpz_init_set_si (y, 2);
1659 mpz_init_set_si (x, 2);
1660 mpz_init_set_si (z, 2);
1661 mpz_init_set_ui (P, 1);
1663 unsigned long long int k = 1;
1664 unsigned long long int l = 1;
1666 while (mpz_cmp_ui (n, 1) != 0)
1668 for (;;)
1672 mpz_mul (t, x, x);
1673 mpz_mod (x, t, n);
1674 mpz_add_ui (x, x, a);
1676 mpz_sub (t, z, x);
1677 mpz_mul (t2, P, t);
1678 mpz_mod (P, t2, n);
1680 if (k % 32 == 1)
1682 mpz_gcd (t, P, n);
1683 if (mpz_cmp_ui (t, 1) != 0)
1684 goto factor_found;
1685 mpz_set (y, x);
1688 while (--k != 0);
1690 mpz_set (z, x);
1691 k = l;
1692 l = 2 * l;
1693 for (unsigned long long int i = 0; i < k; i++)
1695 mpz_mul (t, x, x);
1696 mpz_mod (x, t, n);
1697 mpz_add_ui (x, x, a);
1699 mpz_set (y, x);
1702 factor_found:
1705 mpz_mul (t, y, y);
1706 mpz_mod (y, t, n);
1707 mpz_add_ui (y, y, a);
1709 mpz_sub (t, z, y);
1710 mpz_gcd (t, t, n);
1712 while (mpz_cmp_ui (t, 1) == 0);
1714 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1716 if (!mp_prime_p (t))
1718 devmsg ("[composite factor--restarting pollard-rho] ");
1719 mp_factor_using_pollard_rho (t, a + 1, factors);
1721 else
1723 mp_factor_insert (factors, t);
1726 if (mp_prime_p (n))
1728 mp_factor_insert (factors, n);
1729 break;
1732 mpz_mod (x, x, n);
1733 mpz_mod (z, z, n);
1734 mpz_mod (y, y, n);
1737 mpz_clears (P, t2, t, z, x, y, nullptr);
1740 #if USE_SQUFOF
1741 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1742 algorithm is replaced, consider also returning the remainder. */
1743 ATTRIBUTE_CONST
1744 static uintmax_t
1745 isqrt (uintmax_t n)
1747 if (n == 0)
1748 return 0;
1750 int c = stdc_leading_zeros (n);
1752 /* Make x > sqrt(n). This will be invariant through the loop. */
1753 uintmax_t x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
1755 for (;;)
1757 uintmax_t y = (x + n / x) / 2;
1758 if (y >= x)
1759 return x;
1761 x = y;
1765 ATTRIBUTE_CONST
1766 static uintmax_t
1767 isqrt2 (uintmax_t nh, uintmax_t nl)
1769 /* Ensures the remainder fits in an uintmax_t. */
1770 affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1772 if (nh == 0)
1773 return isqrt (nl);
1775 int shift = stdc_leading_zeros (nh) & ~1;
1777 /* Make x > sqrt (n). */
1778 uintmax_t x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1779 x <<= (W_TYPE_SIZE - shift) >> 1;
1781 /* Do we need more than one iteration? */
1782 for (;;)
1784 MAYBE_UNUSED uintmax_t r;
1785 uintmax_t q, y;
1786 udiv_qrnnd (q, r, nh, nl, x);
1787 y = (x + q) / 2;
1789 if (y >= x)
1791 uintmax_t hi, lo;
1792 umul_ppmm (hi, lo, x + 1, x + 1);
1793 affirm (gt2 (hi, lo, nh, nl));
1795 umul_ppmm (hi, lo, x, x);
1796 affirm (ge2 (nh, nl, hi, lo));
1797 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1798 affirm (hi == 0);
1800 return x;
1803 x = y;
1807 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1808 # define MAGIC64 0x0202021202030213ULL
1809 # define MAGIC63 0x0402483012450293ULL
1810 # define MAGIC65 0x218a019866014613ULL
1811 # define MAGIC11 0x23b
1813 /* Return the square root if the input is a square, otherwise 0. */
1814 ATTRIBUTE_CONST
1815 static uintmax_t
1816 is_square (uintmax_t x)
1818 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1819 computing the square root. */
1820 if (((MAGIC64 >> (x & 63)) & 1)
1821 && ((MAGIC63 >> (x % 63)) & 1)
1822 /* Both 0 and 64 are squares mod (65). */
1823 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1824 && ((MAGIC11 >> (x % 11) & 1)))
1826 uintmax_t r = isqrt (x);
1827 if (r * r == x)
1828 return r;
1830 return 0;
1833 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1834 static short const invtab[0x81] =
1836 0x200,
1837 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1838 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1839 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1840 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1841 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1842 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1843 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1844 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1845 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1846 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1847 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1848 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1849 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1850 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1851 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1852 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1855 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1856 that q < 0x40; here it instead uses a table of (Euclidean) inverses. */
1857 # define div_smallq(q, r, u, d) \
1858 do { \
1859 if ((u) / 0x40 < (d)) \
1861 uintmax_t _dinv, _mask, _q, _r; \
1862 int _cnt = stdc_leading_zeros (d); \
1863 _r = (u); \
1864 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1866 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1867 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1869 else \
1871 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1872 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1874 _r -= _q * (d); \
1876 _mask = -(uintmax_t) (_r >= (d)); \
1877 (r) = _r - (_mask & (d)); \
1878 (q) = _q - _mask; \
1879 affirm ((q) * (d) + (r) == u); \
1881 else \
1883 uintmax_t _q = (u) / (d); \
1884 (r) = (u) - _q * (d); \
1885 (q) = _q; \
1887 } while (0)
1889 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1890 = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1891 with 3025 = 55^2.
1893 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1894 representing G_0 = (-55, 2 * 4652, 8653).
1896 In the notation of the paper:
1898 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1902 t_0 = floor([q_0 + R_0] / S0) = 1
1903 R_1 = t_0 * S_0 - R_0 = 4001
1904 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1907 /* Multipliers, in order of efficiency:
1908 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1909 0.7317 3*5*7 = 105 = 1
1910 0.7820 3*5*11 = 165 = 1
1911 0.7872 3*5 = 15 = 3
1912 0.8101 3*7*11 = 231 = 3
1913 0.8155 3*7 = 21 = 1
1914 0.8284 5*7*11 = 385 = 1
1915 0.8339 5*7 = 35 = 3
1916 0.8716 3*11 = 33 = 1
1917 0.8774 3 = 3 = 3
1918 0.8913 5*11 = 55 = 3
1919 0.8972 5 = 5 = 1
1920 0.9233 7*11 = 77 = 1
1921 0.9295 7 = 7 = 3
1922 0.9934 11 = 11 = 3
1924 # define QUEUE_SIZE 50
1925 #endif
1927 #if STAT_SQUFOF
1928 # define Q_FREQ_SIZE 50
1929 /* Element 0 keeps the total */
1930 static int q_freq[Q_FREQ_SIZE + 1];
1931 #endif
1933 #if USE_SQUFOF
1934 /* Return true on success. Expected to fail only for numbers
1935 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1936 static bool
1937 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1939 /* Uses algorithm and notation from
1941 SQUARE FORM FACTORIZATION
1942 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1944 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1947 static short const multipliers_1[] =
1948 { /* = 1 (mod 4) */
1949 105, 165, 21, 385, 33, 5, 77, 1, 0
1951 static short const multipliers_3[] =
1952 { /* = 3 (mod 4) */
1953 1155, 15, 231, 35, 3, 55, 7, 11, 0
1956 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1958 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1959 return false;
1961 uintmax_t sqrt_n = isqrt2 (n1, n0);
1963 if (n0 == sqrt_n * sqrt_n)
1965 uintmax_t p1, p0;
1967 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1968 affirm (p0 == n0);
1970 if (n1 == p1)
1972 if (prime_p (sqrt_n))
1973 factor_insert_multiplicity (factors, sqrt_n, 2);
1974 else
1976 struct factors f;
1978 f.nfactors = 0;
1979 if (!factor_using_squfof (0, sqrt_n, &f))
1981 /* Try pollard rho instead */
1982 factor_using_pollard_rho (sqrt_n, 1, &f);
1984 /* Duplicate the new factors */
1985 for (unsigned int i = 0; i < f.nfactors; i++)
1986 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
1988 return true;
1992 /* Select multipliers so we always get n * mu = 3 (mod 4) */
1993 for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
1994 *m; m++)
1996 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
1997 unsigned int i;
1998 unsigned int mu = *m;
1999 int qpos = 0;
2001 affirm (mu * n0 % 4 == 3);
2003 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2004 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2005 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2006 mu^3 < n.
2008 However, this seems insufficient: With n = 37243139 and mu =
2009 105, we get a trivial factor, from the square 38809 = 197^2,
2010 without any corresponding Q earlier in the iteration.
2012 Requiring 64 mu^3 < n seems sufficient. */
2013 if (n1 == 0)
2015 if ((uintmax_t) mu * mu * mu >= n0 / 64)
2016 continue;
2018 else
2020 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2021 continue;
2023 umul_ppmm (Dh, Dl, n0, mu);
2024 Dh += n1 * mu;
2026 affirm (Dl % 4 != 1);
2027 affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2029 S = isqrt2 (Dh, Dl);
2031 Q1 = 1;
2032 P = S;
2034 /* Square root remainder fits in one word, so ignore high part. */
2035 Q = Dl - P * P;
2036 /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
2037 L = isqrt (2 * S);
2038 B = 2 * L;
2039 L1 = mu * 2 * L;
2041 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2042 4 D. */
2044 for (i = 0; i <= B; i++)
2046 uintmax_t q, P1, t, rem;
2048 div_smallq (q, rem, S + P, Q);
2049 P1 = S - rem; /* P1 = q*Q - P */
2051 affirm (q > 0 && Q > 0);
2053 # if STAT_SQUFOF
2054 q_freq[0]++;
2055 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2056 # endif
2058 if (Q <= L1)
2060 uintmax_t g = Q;
2062 if ((Q & 1) == 0)
2063 g /= 2;
2065 g /= gcd_odd (g, mu);
2067 if (g <= L)
2069 if (qpos >= QUEUE_SIZE)
2070 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2071 queue[qpos].Q = g;
2072 queue[qpos].P = P % g;
2073 qpos++;
2077 /* I think the difference can be either sign, but mod
2078 2^W_TYPE_SIZE arithmetic should be fine. */
2079 t = Q1 + q * (P - P1);
2080 Q1 = Q;
2081 Q = t;
2082 P = P1;
2084 if ((i & 1) == 0)
2086 uintmax_t r = is_square (Q);
2087 if (r)
2089 for (int j = 0; j < qpos; j++)
2091 if (queue[j].Q == r)
2093 if (r == 1)
2094 /* Traversed entire cycle. */
2095 goto next_multiplier;
2097 /* Need the absolute value for divisibility test. */
2098 if (P >= queue[j].P)
2099 t = P - queue[j].P;
2100 else
2101 t = queue[j].P - P;
2102 if (t % r == 0)
2104 /* Delete entries up to and including entry
2105 j, which matched. */
2106 memmove (queue, queue + j + 1,
2107 (qpos - j - 1) * sizeof (queue[0]));
2108 qpos -= (j + 1);
2110 goto next_i;
2114 /* We have found a square form, which should give a
2115 factor. */
2116 Q1 = r;
2117 affirm (S >= P); /* What signs are possible? */
2118 P += r * ((S - P) / r);
2120 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2121 for the case D = 2N. */
2122 /* Compute Q = (D - P*P) / Q1, but we need double
2123 precision. */
2124 uintmax_t hi, lo;
2125 umul_ppmm (hi, lo, P, P);
2126 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2127 udiv_qrnnd (Q, rem, hi, lo, Q1);
2128 affirm (rem == 0);
2130 for (;;)
2132 /* Note: There appears to by a typo in the paper,
2133 Step 4a in the algorithm description says q <--
2134 floor([S+P]/\hat Q), but looking at the equations
2135 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2136 (In this code, \hat Q is Q1). */
2137 div_smallq (q, rem, S + P, Q);
2138 P1 = S - rem; /* P1 = q*Q - P */
2140 # if STAT_SQUFOF
2141 q_freq[0]++;
2142 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2143 # endif
2144 if (P == P1)
2145 break;
2146 t = Q1 + q * (P - P1);
2147 Q1 = Q;
2148 Q = t;
2149 P = P1;
2152 if ((Q & 1) == 0)
2153 Q /= 2;
2154 Q /= gcd_odd (Q, mu);
2156 affirm (Q > 1 && (n1 || Q < n0));
2158 if (prime_p (Q))
2159 factor_insert (factors, Q);
2160 else if (!factor_using_squfof (0, Q, factors))
2161 factor_using_pollard_rho (Q, 2, factors);
2163 divexact_21 (n1, n0, n1, n0, Q);
2165 if (prime2_p (n1, n0))
2166 factor_insert_large (factors, n1, n0);
2167 else
2169 if (!factor_using_squfof (n1, n0, factors))
2171 if (n1 == 0)
2172 factor_using_pollard_rho (n0, 1, factors);
2173 else
2174 factor_using_pollard_rho2 (n1, n0, 1, factors);
2178 return true;
2181 next_i:;
2183 next_multiplier:;
2185 return false;
2187 #endif
2189 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2190 results in FACTORS. */
2191 static void
2192 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2194 factors->nfactors = 0;
2195 hiset (&factors->plarge, 0);
2197 if (t1 == 0 && t0 < 2)
2198 return;
2200 uuset (&t1, &t0, factor_using_division (t1, t0, factors));
2202 if (t1 == 0 && t0 < 2)
2203 return;
2205 if (prime2_p (t1, t0))
2206 factor_insert_large (factors, t1, t0);
2207 else
2209 #if USE_SQUFOF
2210 if (factor_using_squfof (t1, t0, factors))
2211 return;
2212 #endif
2214 if (t1 == 0)
2215 factor_using_pollard_rho (t0, 1, factors);
2216 else
2217 factor_using_pollard_rho2 (t1, t0, 1, factors);
2221 /* Use Pollard-rho to compute the prime factors of
2222 arbitrary-precision T, and put the results in FACTORS. */
2223 static void
2224 mp_factor (mpz_t t, struct mp_factors *factors)
2226 mp_factor_init (factors);
2228 if (mpz_sgn (t) != 0)
2230 mp_factor_using_division (t, factors);
2232 if (mpz_cmp_ui (t, 1) != 0)
2234 devmsg ("[is number prime?] ");
2235 if (mp_prime_p (t))
2236 mp_factor_insert (factors, t);
2237 else
2238 mp_factor_using_pollard_rho (t, 1, factors);
2243 static strtol_error
2244 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2246 int lo_carry;
2247 uintmax_t hi = 0, lo = 0;
2249 strtol_error err = LONGINT_INVALID;
2251 /* Initial scan for invalid digits. */
2252 char const *p = s;
2253 for (;;)
2255 unsigned char c = *p++;
2256 if (c == 0)
2257 break;
2259 if (UNLIKELY (!ISDIGIT (c)))
2261 err = LONGINT_INVALID;
2262 break;
2265 err = LONGINT_OK; /* we've seen at least one valid digit */
2268 while (err == LONGINT_OK)
2270 unsigned char c = *s++;
2271 if (c == 0)
2272 break;
2274 c -= '0';
2276 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2278 err = LONGINT_OVERFLOW;
2279 break;
2281 hi = 10 * hi;
2283 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2284 lo_carry += 10 * lo < 2 * lo;
2286 lo = 10 * lo;
2287 lo += c;
2289 lo_carry += lo < c;
2290 hi += lo_carry;
2291 if (UNLIKELY (hi < lo_carry))
2293 err = LONGINT_OVERFLOW;
2294 break;
2298 *hip = hi;
2299 *lop = lo;
2301 return err;
2304 /* FACTOR_PIPE_BUF is chosen to give good performance,
2305 and also is the max guaranteed size that
2306 consumers can read atomically through pipes.
2307 Also it's big enough to cater for max line length
2308 even with 128 bit uintmax_t. */
2309 #ifndef _POSIX_PIPE_BUF
2310 # define _POSIX_PIPE_BUF 512
2311 #endif
2312 #ifdef PIPE_BUF
2313 enum { FACTOR_PIPE_BUF = PIPE_BUF };
2314 #else
2315 enum { FACTOR_PIPE_BUF = _POSIX_PIPE_BUF };
2316 #endif
2318 /* Structure and routines for buffering and outputting full lines, to
2319 support parallel operation efficiently.
2321 The buffer is twice FACTOR_PIPE_BUF so that its second half can
2322 hold the remainder of data that is somewhat too large. Also, the
2323 very end of the second half is used to hold temporary data when
2324 stringifying integers, which is most conveniently done
2325 right-to-left.
2327 Although the buffer's second half doesn't need to be quite so large
2328 - its necessary size is bounded above by roughly the maximum output
2329 line for a uuint plus the string length of a uuint - it'd be a bit
2330 of a pain to figure out exactly how small it can be without causing
2331 trouble. */
2332 static char lbuf_buf[2 * FACTOR_PIPE_BUF];
2333 static idx_t lbuffered;
2335 /* Write complete LBUF to standard output. */
2336 static void
2337 lbuf_flush (void)
2339 idx_t size = lbuffered;
2341 /* Update lbuffered now, to avoid infinite recursion on write error. */
2342 lbuffered = 0;
2344 if (full_write (STDOUT_FILENO, lbuf_buf, size) != size)
2345 write_error ();
2348 /* Write LBUF to standard output.
2349 LBUF should contain at least FACTOR_PIPE_BUF bytes.
2350 If possible, write a prefix of LBUF that is newline terminated
2351 and contains <= FACTOR_PIPE_BUF bytes, so consumers can read atomically.
2352 But if the first FACTOR_PIPE_BUF bytes contain no newlines,
2353 give up on atomicity and just write the first FACTOR_PIPE_BUF bytes. */
2354 static void
2355 lbuf_half_flush (void)
2357 char *nl = memrchr (lbuf_buf, '\n', FACTOR_PIPE_BUF);
2358 char *suffix = nl ? nl + 1 : lbuf_buf + FACTOR_PIPE_BUF;
2359 idx_t prefix_size = suffix - lbuf_buf;
2360 idx_t suffix_size = lbuffered - prefix_size;
2361 lbuffered = prefix_size;
2362 lbuf_flush ();
2363 lbuffered = suffix_size;
2364 memmove (lbuf_buf, suffix, suffix_size);
2367 /* Add a character C to lbuf_buf. */
2368 static void
2369 lbuf_putc (char c)
2371 lbuf_buf[lbuffered++] = c;
2374 /* Add a newline to lbuf_buf. Then, if enough bytes are already
2375 buffered, write the buffer atomically to standard output. */
2376 static void
2377 lbuf_putnl (void)
2379 lbuf_putc ('\n');
2381 /* Provide immediate output for interactive use. */
2382 static int line_buffered = -1;
2383 if (line_buffered < 0)
2384 line_buffered = isatty (STDOUT_FILENO);
2386 if (line_buffered)
2387 lbuf_flush ();
2388 else if (FACTOR_PIPE_BUF <= lbuffered)
2389 lbuf_half_flush ();
2392 /* Append the string representation of I to lbuf_buf, followed by
2393 everything from BUFEND to lbuf_buf's end. Use the area just before
2394 BUFEND temporarily. */
2395 static void
2396 lbuf_putint_append (uintmax_t i, char *bufend)
2398 char *istr = bufend;
2401 *--istr = '0' + i % 10;
2402 i /= 10;
2404 while (i);
2406 char *p = lbuf_buf + lbuffered;
2408 *p++ = *istr++;
2409 while (istr < lbuf_buf + sizeof lbuf_buf);
2411 lbuffered = p - lbuf_buf;
2414 /* Append the string representation of I to lbuf_buf. */
2415 static void
2416 lbuf_putint (uintmax_t i)
2418 return lbuf_putint_append (i, lbuf_buf + sizeof lbuf_buf);
2421 /* Append the string representation of T to lbuf_buf. */
2422 static void
2423 print_uuint (uuint t)
2425 uintmax_t t1 = hi (t), t0 = lo (t);
2426 char *bufend = lbuf_buf + sizeof lbuf_buf;
2428 while (t1)
2430 uintmax_t r = t1 % BIG_POWER_OF_10;
2431 t1 /= BIG_POWER_OF_10;
2432 udiv_qrnnd (t0, r, r, t0, BIG_POWER_OF_10);
2433 for (int i = 0; i < LOG_BIG_POWER_OF_10; i++)
2435 *--bufend = '0' + r % 10;
2436 r /= 10;
2440 lbuf_putint_append (t0, bufend);
2443 /* Buffer an mpz to the internal LBUF, possibly writing if it is long. */
2444 static void
2445 lbuf_putmpz (mpz_t const i)
2447 idx_t sizeinbase = mpz_sizeinbase (i, 10);
2448 char *lbuf_bufend = lbuf_buf + sizeof lbuf_buf;
2449 char *p = lbuf_buf + lbuffered;
2450 if (sizeinbase < lbuf_bufend - p)
2452 mpz_get_str (p, 10, i);
2453 p += sizeinbase;
2454 lbuffered = p - !p[-1] - lbuf_buf;
2455 while (FACTOR_PIPE_BUF <= lbuffered)
2456 lbuf_half_flush ();
2458 else
2460 lbuf_flush ();
2461 char *istr = ximalloc (sizeinbase + 1);
2462 mpz_get_str (istr, 10, i);
2463 idx_t istrlen = sizeinbase - !istr[sizeinbase - 1];
2464 if (full_write (STDOUT_FILENO, istr, istrlen) != istrlen)
2465 write_error ();
2466 free (istr);
2470 /* Single-precision factoring */
2471 static void
2472 print_factors_single (uintmax_t t1, uintmax_t t0)
2474 struct factors factors;
2476 print_uuint (make_uuint (t1, t0));
2477 lbuf_putc (':');
2479 factor (t1, t0, &factors);
2481 for (int j = 0; j < factors.nfactors; j++)
2482 for (int k = 0; k < factors.e[j]; k++)
2484 lbuf_putc (' ');
2485 print_uuint (make_uuint (0, factors.p[j]));
2486 if (print_exponents && factors.e[j] > 1)
2488 lbuf_putc ('^');
2489 lbuf_putint (factors.e[j]);
2490 break;
2494 if (hi (factors.plarge))
2496 lbuf_putc (' ');
2497 print_uuint (factors.plarge);
2500 lbuf_putnl ();
2503 /* Emit the factors of the indicated number. If we have the option of using
2504 either algorithm, we select on the basis of the length of the number.
2505 For longer numbers, we prefer the MP algorithm even if the native algorithm
2506 has enough digits, because the algorithm is better. The turnover point
2507 depends on the value. */
2508 static bool
2509 print_factors (char const *input)
2511 /* Skip initial spaces and '+'. */
2512 char const *str = input;
2513 while (*str == ' ')
2514 str++;
2515 str += *str == '+';
2517 uintmax_t t1, t0;
2519 /* Try converting the number to one or two words. If it fails, use GMP or
2520 print an error message. The 2nd condition checks that the most
2521 significant bit of the two-word number is clear, in a typesize neutral
2522 way. */
2523 strtol_error err = strto2uintmax (&t1, &t0, str);
2525 switch (err)
2527 case LONGINT_OK:
2528 if (((t1 << 1) >> 1) == t1)
2530 devmsg ("[using single-precision arithmetic] ");
2531 print_factors_single (t1, t0);
2532 return true;
2534 break;
2536 case LONGINT_OVERFLOW:
2537 /* Try GMP. */
2538 break;
2540 default:
2541 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2542 return false;
2545 devmsg ("[using arbitrary-precision arithmetic] ");
2546 mpz_t t;
2547 struct mp_factors factors;
2549 mpz_init_set_str (t, str, 10);
2551 lbuf_putmpz (t);
2552 lbuf_putc (':');
2553 mp_factor (t, &factors);
2555 for (idx_t j = 0; j < factors.nfactors; j++)
2556 for (unsigned long int k = 0; k < factors.e[j]; k++)
2558 lbuf_putc (' ');
2559 lbuf_putmpz (factors.p[j]);
2560 if (print_exponents && factors.e[j] > 1)
2562 lbuf_putc ('^');
2563 lbuf_putint (factors.e[j]);
2564 break;
2568 mp_factor_clear (&factors);
2569 mpz_clear (t);
2570 lbuf_putnl ();
2571 return true;
2574 void
2575 usage (int status)
2577 if (status != EXIT_SUCCESS)
2578 emit_try_help ();
2579 else
2581 printf (_("\
2582 Usage: %s [OPTION] [NUMBER]...\n\
2584 program_name);
2585 fputs (_("\
2586 Print the prime factors of each specified integer NUMBER. If none\n\
2587 are specified on the command line, read them from standard input.\n\
2589 "), stdout);
2590 fputs ("\
2591 -h, --exponents print repeated factors in form p^e unless e is 1\n\
2592 ", stdout);
2593 fputs (HELP_OPTION_DESCRIPTION, stdout);
2594 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2595 emit_ancillary_info (PROGRAM_NAME);
2597 exit (status);
2600 static bool
2601 do_stdin (void)
2603 bool ok = true;
2604 token_buffer tokenbuffer;
2606 init_tokenbuffer (&tokenbuffer);
2608 while (true)
2610 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2611 &tokenbuffer);
2612 if (token_length == (size_t) -1)
2614 if (ferror (stdin))
2615 error (EXIT_FAILURE, errno, _("error reading input"));
2616 break;
2619 ok &= print_factors (tokenbuffer.buffer);
2621 free (tokenbuffer.buffer);
2623 return ok;
2627 main (int argc, char **argv)
2629 initialize_main (&argc, &argv);
2630 set_program_name (argv[0]);
2631 setlocale (LC_ALL, "");
2632 bindtextdomain (PACKAGE, LOCALEDIR);
2633 textdomain (PACKAGE);
2635 atexit (close_stdout);
2637 int c;
2638 while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1)
2640 switch (c)
2642 case 'h': /* NetBSD used -h for this functionality first. */
2643 print_exponents = true;
2644 break;
2646 case DEV_DEBUG_OPTION:
2647 dev_debug = true;
2648 break;
2650 case_GETOPT_HELP_CHAR;
2652 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2654 default:
2655 usage (EXIT_FAILURE);
2659 atexit (lbuf_flush);
2661 #if STAT_SQUFOF
2662 memset (q_freq, 0, sizeof (q_freq));
2663 #endif
2665 bool ok;
2666 if (argc <= optind)
2667 ok = do_stdin ();
2668 else
2670 ok = true;
2671 for (int i = optind; i < argc; i++)
2672 if (! print_factors (argv[i]))
2673 ok = false;
2676 #if STAT_SQUFOF
2677 if (q_freq[0] > 0)
2679 double acc_f;
2680 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2681 for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2683 double f = (double) q_freq[i] / q_freq[0];
2684 acc_f += f;
2685 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2686 100.0 * f, 100.0 * acc_f);
2689 #endif
2691 return ok ? EXIT_SUCCESS : EXIT_FAILURE;