factor: mod2 now returns uuint
[coreutils.git] / src / factor.c
blob92cd7de2ddb886e0b46f7180fc1dbcbee23bd8fe
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organization:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Algorithm:
40 (1) Perform trial division using a small primes table, but without hardware
41 division since the primes table store inverses modulo the word base.
42 (The GMP variant of this code doesn't make use of the precomputed
43 inverses, but instead relies on GMP for fast divisibility testing.)
44 (2) Check the nature of any non-factored part using Miller-Rabin for
45 detecting composites, and Lucas for detecting primes.
46 (3) Factor any remaining composite part using the Pollard-Brent rho
47 algorithm or if USE_SQUFOF is defined to 1, try that first.
48 Status of found factors are checked again using Miller-Rabin and Lucas.
50 We prefer using Hensel norm in the divisions, not the more familiar
51 Euclidean norm, since the former leads to much faster code. In the
52 Pollard-Brent rho code and the prime testing code, we use Montgomery's
53 trick of multiplying all n-residues by the word base, allowing cheap Hensel
54 reductions mod n.
56 The GMP code uses an algorithm that can be considerably slower;
57 for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58 2^{127}-3 takes about 50 ms with the two-word algorithm but would
59 take about 750 ms with the GMP code.
61 Improvements:
63 * Use modular inverses also for exact division in the Lucas code, and
64 elsewhere. A problem is to locate the inverses not from an index, but
65 from a prime. We might instead compute the inverse on-the-fly.
67 * Tune trial division table size (not forgetting that this is a standalone
68 program where the table will be read from secondary storage for
69 each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidean)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdbit.h>
107 #include <stdio.h>
108 #include <gmp.h>
110 #include "system.h"
111 #include "assure.h"
112 #include "full-write.h"
113 #include "quote.h"
114 #include "readtokens.h"
115 #include "xstrtol.h"
117 /* The official name of this program (e.g., no 'g' prefix). */
118 #define PROGRAM_NAME "factor"
120 #define AUTHORS \
121 proper_name ("Paul Rubin"), \
122 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
123 proper_name_lite ("Niels Moller", "Niels M\303\266ller")
125 /* Token delimiters when reading from a file. */
126 #define DELIM "\n\t "
128 #ifndef USE_LONGLONG_H
129 /* With the way we use longlong.h, it's only safe to use
130 when UWtype = UHWtype, as there were various cases
131 (as can be seen in the history for longlong.h) where
132 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
133 to avoid compile time or run time issues. */
134 # if LONG_MAX == INTMAX_MAX
135 # define USE_LONGLONG_H 1
136 # endif
137 #endif
139 #if USE_LONGLONG_H
141 /* Make definitions for longlong.h to make it do what it can do for us */
143 /* bitcount for uintmax_t */
144 # if UINTMAX_MAX == UINT32_MAX
145 # define W_TYPE_SIZE 32
146 # elif UINTMAX_MAX == UINT64_MAX
147 # define W_TYPE_SIZE 64
148 # elif UINTMAX_MAX == UINT128_MAX
149 # define W_TYPE_SIZE 128
150 # endif
152 # define UWtype uintmax_t
153 # define UHWtype unsigned long int
154 # undef UDWtype
155 # if HAVE_ATTRIBUTE_MODE
156 typedef unsigned int UQItype __attribute__ ((mode (QI)));
157 typedef int SItype __attribute__ ((mode (SI)));
158 typedef unsigned int USItype __attribute__ ((mode (SI)));
159 typedef int DItype __attribute__ ((mode (DI)));
160 typedef unsigned int UDItype __attribute__ ((mode (DI)));
161 # else
162 typedef unsigned char UQItype;
163 typedef long SItype;
164 typedef unsigned long int USItype;
165 # if HAVE_LONG_LONG_INT
166 typedef long long int DItype;
167 typedef unsigned long long int UDItype;
168 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
169 typedef long int DItype;
170 typedef unsigned long int UDItype;
171 # endif
172 # endif
173 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
174 # define ASSERT(x) /* FIXME make longlong.h really standalone */
175 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
176 # ifndef __GMP_GNUC_PREREQ
177 # define __GMP_GNUC_PREREQ(a,b) 1
178 # endif
180 /* longlong.h uses these macros only in certain system compiler combinations.
181 Ensure usage to pacify -Wunused-macros. */
182 #if defined ASSERT || defined UHWtype || defined __GMP_DECLSPEC
183 #endif
185 # if _ARCH_PPC
186 # define HAVE_HOST_CPU_FAMILY_powerpc 1
187 # endif
188 # include "longlong.h"
190 #else /* not USE_LONGLONG_H */
192 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
193 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
194 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
195 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
197 #endif
199 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
200 #define MAX_NFACTS 26
202 enum
204 DEV_DEBUG_OPTION = CHAR_MAX + 1
207 static struct option const long_options[] =
209 {"exponents", no_argument, nullptr, 'h'},
210 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
211 {GETOPT_HELP_OPTION_DECL},
212 {GETOPT_VERSION_OPTION_DECL},
213 {nullptr, 0, nullptr, 0}
216 /* If true, use p^e output format. */
217 static bool print_exponents;
219 /* This represents an unsigned integer twice as wide as uintmax_t. */
220 typedef struct { uintmax_t uu[2]; } uuint;
222 /* Accessors and constructors for the type. Pprograms should not
223 access the type's internals directly, in case some future version
224 replaces the type with unsigned __int128 or whatever. */
225 static uintmax_t lo (uuint u) { return u.uu[0]; }
226 static uintmax_t hi (uuint u) { return u.uu[1]; }
227 static void hiset (uuint *u, uintmax_t hi) { u->uu[1] = hi; }
228 static void
229 uuset (uintmax_t *phi, uintmax_t *plo, uuint uu)
231 *phi = hi (uu);
232 *plo = lo (uu);
234 static uuint
235 make_uuint (uintmax_t hi, uintmax_t lo)
237 return (uuint) {{lo, hi}};
240 struct factors
242 uuint plarge; /* Can have a single large factor */
243 uintmax_t p[MAX_NFACTS];
244 unsigned char e[MAX_NFACTS];
245 unsigned char nfactors;
248 struct mp_factors
250 mpz_t *p;
251 unsigned long int *e;
252 idx_t nfactors;
253 idx_t nalloc;
256 static void factor (uintmax_t, uintmax_t, struct factors *);
258 #ifndef umul_ppmm
259 # define umul_ppmm(w1, w0, u, v) \
260 do { \
261 uintmax_t __x0, __x1, __x2, __x3; \
262 unsigned long int __ul, __vl, __uh, __vh; \
263 uintmax_t __u = (u), __v = (v); \
265 __ul = __ll_lowpart (__u); \
266 __uh = __ll_highpart (__u); \
267 __vl = __ll_lowpart (__v); \
268 __vh = __ll_highpart (__v); \
270 __x0 = (uintmax_t) __ul * __vl; \
271 __x1 = (uintmax_t) __ul * __vh; \
272 __x2 = (uintmax_t) __uh * __vl; \
273 __x3 = (uintmax_t) __uh * __vh; \
275 __x1 += __ll_highpart (__x0);/* This can't give carry. */ \
276 __x1 += __x2; /* But this indeed can. */ \
277 if (__x1 < __x2) /* Did we get it? */ \
278 __x3 += __ll_B; /* Yes, add it in the proper pos. */ \
280 (w1) = __x3 + __ll_highpart (__x1); \
281 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
282 } while (0)
283 #endif
285 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
286 /* Define our own, not needing normalization. This function is
287 currently not performance critical, so keep it simple. Similar to
288 the mod macro below. */
289 # undef udiv_qrnnd
290 # define udiv_qrnnd(q, r, n1, n0, d) \
291 do { \
292 uintmax_t __d1, __d0, __q, __r1, __r0; \
294 __d1 = (d); __d0 = 0; \
295 __r1 = (n1); __r0 = (n0); \
296 affirm (__r1 < __d1); \
297 __q = 0; \
298 for (int __i = W_TYPE_SIZE; __i > 0; __i--) \
300 rsh2 (__d1, __d0, __d1, __d0, 1); \
301 __q <<= 1; \
302 if (ge2 (__r1, __r0, __d1, __d0)) \
304 __q++; \
305 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
308 (r) = __r0; \
309 (q) = __q; \
310 } while (0)
311 #endif
313 #if !defined add_ssaaaa
314 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
315 do { \
316 uintmax_t _add_x; \
317 _add_x = (al) + (bl); \
318 (sh) = (ah) + (bh) + (_add_x < (al)); \
319 (sl) = _add_x; \
320 } while (0)
321 #endif
323 /* Set (rh,rl) = (ah,al) >> cnt, where 0 < cnt < W_TYPE_SIZE. */
324 #define rsh2(rh, rl, ah, al, cnt) \
325 do { \
326 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
327 (rh) = (ah) >> (cnt); \
328 } while (0)
330 /* Set (rh,rl) = (ah,al) << cnt, where 0 < cnt < W_TYPE_SIZE. */
331 #define lsh2(rh, rl, ah, al, cnt) \
332 do { \
333 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
334 (rl) = (al) << (cnt); \
335 } while (0)
337 #define ge2(ah, al, bh, bl) \
338 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
340 #define gt2(ah, al, bh, bl) \
341 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
343 #ifndef sub_ddmmss
344 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
345 do { \
346 uintmax_t _cy; \
347 _cy = (al) < (bl); \
348 (rl) = (al) - (bl); \
349 (rh) = (ah) - (bh) - _cy; \
350 } while (0)
351 #endif
353 /* Requires that a < n and b <= n */
354 #define submod(r,a,b,n) \
355 do { \
356 uintmax_t _t = - (uintmax_t) (a < b); \
357 (r) = ((n) & _t) + (a) - (b); \
358 } while (0)
360 #define addmod(r,a,b,n) \
361 submod ((r), (a), ((n) - (b)), (n))
363 /* Modular two-word addition and subtraction. For performance reasons, the
364 most significant bit of n1 must be clear. The destination variables must be
365 distinct from the mod operand. */
366 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
367 do { \
368 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
369 if (ge2 ((r1), (r0), (n1), (n0))) \
370 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
371 } while (0)
372 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
373 do { \
374 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
375 if ((intmax_t) (r1) < 0) \
376 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
377 } while (0)
379 #define HIGHBIT_TO_MASK(x) \
380 (((intmax_t)-1 >> 1) < 0 \
381 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
382 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
383 ? UINTMAX_MAX : (uintmax_t) 0))
385 /* Return r = a mod d, where a = <a1,a0>, d = <d1,d0>.
386 Requires that d1 != 0. */
387 ATTRIBUTE_PURE static uuint
388 mod2 (uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
390 affirm (d1 != 0);
392 if (a1)
394 int cntd = stdc_leading_zeros (d1);
395 int cnta = stdc_leading_zeros (a1);
396 int cnt = cntd - cnta;
397 if (0 < cnt)
399 lsh2 (d1, d0, d1, d0, cnt);
400 for (int i = 0; i < cnt; i++)
402 if (ge2 (a1, a0, d1, d0))
403 sub_ddmmss (a1, a0, a1, a0, d1, d0);
404 rsh2 (d1, d0, d1, d0, 1);
409 return make_uuint (a1, a0);
412 ATTRIBUTE_CONST
413 static uintmax_t
414 gcd_odd (uintmax_t a, uintmax_t b)
416 if ((b & 1) == 0)
418 uintmax_t t = b;
419 b = a;
420 a = t;
422 if (a == 0)
423 return b;
425 /* Take out least significant one bit, to make room for sign */
426 b >>= 1;
428 for (;;)
430 uintmax_t t;
431 uintmax_t bgta;
433 assume (a);
434 a >>= stdc_trailing_zeros (a);
435 a >>= 1;
437 t = a - b;
438 if (t == 0)
439 return (a << 1) + 1;
441 bgta = HIGHBIT_TO_MASK (t);
443 /* b <-- min (a, b) */
444 b += (bgta & t);
446 /* a <-- |a - b| */
447 a = (t ^ bgta) - bgta;
451 static uintmax_t
452 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
454 affirm (b0 & 1);
456 if ((a0 | a1) == 0)
458 *r1 = b1;
459 return b0;
461 if (!a0)
462 a0 = a1, a1 = 0;
463 assume (a0);
464 int ctz = stdc_trailing_zeros (a0);
465 if (ctz)
466 rsh2 (a1, a0, a1, a0, ctz);
468 for (;;)
470 if ((b1 | a1) == 0)
472 *r1 = 0;
473 return gcd_odd (b0, a0);
476 if (gt2 (a1, a0, b1, b0))
478 sub_ddmmss (a1, a0, a1, a0, b1, b0);
479 if (!a0)
480 a0 = a1, a1 = 0;
481 assume (a0);
482 ctz = stdc_trailing_zeros (a0);
483 if (ctz)
484 rsh2 (a1, a0, a1, a0, ctz);
486 else if (gt2 (b1, b0, a1, a0))
488 sub_ddmmss (b1, b0, b1, b0, a1, a0);
489 if (!b0)
490 b0 = b1, b1 = 0;
491 assume (b0);
492 ctz = stdc_trailing_zeros (b0);
493 if (ctz)
494 rsh2 (b1, b0, b1, b0, ctz);
496 else
497 break;
500 *r1 = a1;
501 return a0;
504 static void
505 factor_insert_multiplicity (struct factors *factors,
506 uintmax_t prime, int m)
508 int nfactors = factors->nfactors;
509 uintmax_t *p = factors->p;
510 unsigned char *e = factors->e;
512 /* Locate position for insert new or increment e. */
513 int i;
514 for (i = nfactors - 1; i >= 0; i--)
516 if (p[i] <= prime)
517 break;
520 if (i < 0 || p[i] != prime)
522 for (int j = nfactors - 1; j > i; j--)
524 p[j + 1] = p[j];
525 e[j + 1] = e[j];
527 p[i + 1] = prime;
528 e[i + 1] = m;
529 factors->nfactors = nfactors + 1;
531 else
533 e[i] += m;
537 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
539 static void
540 factor_insert_large (struct factors *factors,
541 uintmax_t p1, uintmax_t p0)
543 if (p1 > 0)
545 affirm (hi (factors->plarge) == 0);
546 factors->plarge = make_uuint (p1, p0);
548 else
549 factor_insert (factors, p0);
552 #ifndef mpz_inits
554 # include <stdarg.h>
556 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
557 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
559 static void
560 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
562 va_list ap;
564 va_start (ap, mpz_single_init);
566 mpz_t *mpz;
567 while ((mpz = va_arg (ap, mpz_t *)))
568 mpz_single_init (*mpz);
570 va_end (ap);
572 #endif
574 static void mp_factor (mpz_t, struct mp_factors *);
576 static void
577 mp_factor_init (struct mp_factors *factors)
579 factors->p = nullptr;
580 factors->e = nullptr;
581 factors->nfactors = 0;
582 factors->nalloc = 0;
585 static void
586 mp_factor_clear (struct mp_factors *factors)
588 for (idx_t i = 0; i < factors->nfactors; i++)
589 mpz_clear (factors->p[i]);
591 free (factors->p);
592 free (factors->e);
595 static void
596 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
598 idx_t nfactors = factors->nfactors;
599 mpz_t *p = factors->p;
600 unsigned long int *e = factors->e;
601 ptrdiff_t i;
603 /* Locate position for insert new or increment e. */
604 for (i = nfactors - 1; i >= 0; i--)
606 if (mpz_cmp (p[i], prime) <= 0)
607 break;
610 if (i < 0 || mpz_cmp (p[i], prime) != 0)
612 if (factors->nfactors == factors->nalloc)
614 p = xpalloc (p, &factors->nalloc, 1, -1, sizeof *p);
615 e = xireallocarray (e, factors->nalloc, sizeof *e);
618 mpz_init (p[nfactors]);
619 for (ptrdiff_t j = nfactors - 1; j > i; j--)
621 mpz_set (p[j + 1], p[j]);
622 e[j + 1] = e[j];
624 mpz_set (p[i + 1], prime);
625 e[i + 1] = 1;
627 factors->p = p;
628 factors->e = e;
629 factors->nfactors = nfactors + 1;
631 else
633 e[i] += 1;
637 static void
638 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
640 mpz_t pz;
642 mpz_init_set_ui (pz, prime);
643 mp_factor_insert (factors, pz);
644 mpz_clear (pz);
648 /* Number of bits in an uintmax_t. */
649 enum { W = sizeof (uintmax_t) * CHAR_BIT };
651 /* Verify that uintmax_t does not have holes in its representation. */
652 static_assert (UINTMAX_MAX >> (W - 1) == 1);
654 #define P(a,b,c,d) a,
655 static const unsigned char primes_diff[] = {
656 #include "primes.h"
657 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
659 #undef P
661 #define PRIMES_PTAB_ENTRIES \
662 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
664 #define P(a,b,c,d) b,
665 static const unsigned char primes_diff8[] = {
666 #include "primes.h"
667 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
669 #undef P
671 struct primes_dtab
673 uintmax_t binv, lim;
676 #define P(a,b,c,d) {c,d},
677 static const struct primes_dtab primes_dtab[] = {
678 #include "primes.h"
679 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
681 #undef P
683 /* Verify that uintmax_t is not wider than
684 the integers used to generate primes.h. */
685 static_assert (W <= WIDE_UINT_BITS);
687 /* debugging for developers. Enables devmsg().
688 This flag is used only in the GMP code. */
689 static bool dev_debug = false;
691 /* Prove primality or run probabilistic tests. */
692 static bool flag_prove_primality = PROVE_PRIMALITY;
694 /* Number of Miller-Rabin tests to run when not proving primality. */
695 #define MR_REPS 25
697 static void
698 factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off)
700 for (int j = 0; j < off; j++)
701 p += primes_diff[i + j];
702 factor_insert (factors, p);
705 /* Trial division with odd primes uses the following trick.
707 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
708 consider the case t < B (this is the second loop below).
710 From our tables we get
712 binv = p^{-1} (mod B)
713 lim = floor ((B-1) / p).
715 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
716 (and all quotients in this range occur for some t).
718 Then t = q * p is true also (mod B), and p is invertible we get
720 q = t * binv (mod B).
722 Next, assume that t is *not* divisible by p. Since multiplication
723 by binv (mod B) is a one-to-one mapping,
725 t * binv (mod B) > lim,
727 because all the smaller values are already taken.
729 This can be summed up by saying that the function
731 q(t) = binv * t (mod B)
733 is a permutation of the range 0 <= t < B, with the curious property
734 that it maps the multiples of p onto the range 0 <= q <= lim, in
735 order, and the non-multiples of p onto the range lim < q < B.
738 static uintmax_t
739 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
740 struct factors *factors)
742 if (t0 % 2 == 0)
744 int cnt;
746 if (t0 == 0)
748 assume (t1);
749 cnt = stdc_trailing_zeros (t1);
750 t0 = t1 >> cnt;
751 t1 = 0;
752 cnt += W_TYPE_SIZE;
754 else
756 cnt = stdc_trailing_zeros (t0);
757 rsh2 (t1, t0, t1, t0, cnt);
760 factor_insert_multiplicity (factors, 2, cnt);
763 uintmax_t p = 3;
764 idx_t i;
765 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
767 for (;;)
769 uintmax_t q1, q0, hi;
770 MAYBE_UNUSED uintmax_t lo;
772 q0 = t0 * primes_dtab[i].binv;
773 umul_ppmm (hi, lo, q0, p);
774 if (hi > t1)
775 break;
776 hi = t1 - hi;
777 q1 = hi * primes_dtab[i].binv;
778 if (LIKELY (q1 > primes_dtab[i].lim))
779 break;
780 t1 = q1; t0 = q0;
781 factor_insert (factors, p);
783 p += primes_diff[i + 1];
785 if (t1p)
786 *t1p = t1;
788 #define DIVBLOCK(I) \
789 do { \
790 for (;;) \
792 q = t0 * pd[I].binv; \
793 if (LIKELY (q > pd[I].lim)) \
794 break; \
795 t0 = q; \
796 factor_insert_refind (factors, p, i + 1, I); \
798 } while (0)
800 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
802 uintmax_t q;
803 const struct primes_dtab *pd = &primes_dtab[i];
804 DIVBLOCK (0);
805 DIVBLOCK (1);
806 DIVBLOCK (2);
807 DIVBLOCK (3);
808 DIVBLOCK (4);
809 DIVBLOCK (5);
810 DIVBLOCK (6);
811 DIVBLOCK (7);
813 p += primes_diff8[i];
814 if (p * p > t0)
815 break;
818 return t0;
821 static void
822 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
824 mpz_t q;
825 mp_bitcnt_t p;
827 devmsg ("[trial division] ");
829 mpz_init (q);
831 p = mpz_scan1 (t, 0);
832 mpz_fdiv_q_2exp (t, t, p);
833 while (p)
835 mp_factor_insert_ui (factors, 2);
836 --p;
839 unsigned long int d = 3;
840 for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;)
842 if (! mpz_divisible_ui_p (t, d))
844 d += primes_diff[i++];
845 if (mpz_cmp_ui (t, d * d) < 0)
846 break;
848 else
850 mpz_tdiv_q_ui (t, t, d);
851 mp_factor_insert_ui (factors, d);
855 mpz_clear (q);
858 /* Entry i contains (2i+1)^(-1) mod 2^8. */
859 static const unsigned char binvert_table[128] =
861 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
862 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
863 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
864 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
865 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
866 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
867 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
868 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
869 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
870 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
871 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
872 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
873 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
874 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
875 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
876 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
879 /* Compute n^(-1) mod B, using a Newton iteration. */
880 #define binv(inv,n) \
881 do { \
882 uintmax_t __n = (n); \
883 uintmax_t __inv; \
885 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
886 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
887 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
888 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
890 if (W_TYPE_SIZE > 64) \
892 int __invbits = 64; \
893 do { \
894 __inv = 2 * __inv - __inv * __inv * __n; \
895 __invbits *= 2; \
896 } while (__invbits < W_TYPE_SIZE); \
899 (inv) = __inv; \
900 } while (0)
902 /* q = u / d, assuming d|u. */
903 #define divexact_21(q1, q0, u1, u0, d) \
904 do { \
905 uintmax_t _di, _q0; \
906 binv (_di, (d)); \
907 _q0 = (u0) * _di; \
908 if ((u1) >= (d)) \
910 uintmax_t _p1; \
911 MAYBE_UNUSED intmax_t _p0; \
912 umul_ppmm (_p1, _p0, _q0, d); \
913 (q1) = ((u1) - _p1) * _di; \
914 (q0) = _q0; \
916 else \
918 (q0) = _q0; \
919 (q1) = 0; \
921 } while (0)
923 /* x B (mod n). */
924 #define redcify(r_prim, r, n) \
925 do { \
926 MAYBE_UNUSED uintmax_t _redcify_q; \
927 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
928 } while (0)
930 /* x B^2 (mod n). Requires x > 0, n1 < B/2. */
931 #define redcify2(r1, r0, x, n1, n0) \
932 do { \
933 uintmax_t _r1, _r0, _i; \
934 if ((x) < (n1)) \
936 _r1 = (x); _r0 = 0; \
937 _i = W_TYPE_SIZE; \
939 else \
941 _r1 = 0; _r0 = (x); \
942 _i = 2 * W_TYPE_SIZE; \
944 while (_i-- > 0) \
946 lsh2 (_r1, _r0, _r1, _r0, 1); \
947 if (ge2 (_r1, _r0, (n1), (n0))) \
948 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
950 (r1) = _r1; \
951 (r0) = _r0; \
952 } while (0)
954 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
955 Both a and b must be in redc form, the result will be in redc form too. */
956 static inline uintmax_t
957 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
959 uintmax_t rh, rl, q, th, xh;
960 MAYBE_UNUSED uintmax_t tl;
962 umul_ppmm (rh, rl, a, b);
963 q = rl * mi;
964 umul_ppmm (th, tl, q, m);
965 xh = rh - th;
966 if (rh < th)
967 xh += m;
969 return xh;
972 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
973 Both a and b must be in redc form, the result will be in redc form too.
974 For performance reasons, the most significant bit of m must be clear. */
975 static uintmax_t
976 mulredc2 (uintmax_t *r1p,
977 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
978 uintmax_t m1, uintmax_t m0, uintmax_t mi)
980 uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
981 MAYBE_UNUSED uintmax_t p0;
982 mi = -mi;
983 affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0);
984 affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0);
985 affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0);
987 /* First compute a0 * <b1, b0> B^{-1}
988 +-----+
989 |a0 b0|
990 +--+--+--+
991 |a0 b1|
992 +--+--+--+
993 |q0 m0|
994 +--+--+--+
995 |q0 m1|
996 -+--+--+--+
997 |r1|r0| 0|
998 +--+--+--+
1000 umul_ppmm (t1, t0, a0, b0);
1001 umul_ppmm (r1, r0, a0, b1);
1002 q = mi * t0;
1003 umul_ppmm (p1, p0, q, m0);
1004 umul_ppmm (s1, s0, q, m1);
1005 r0 += (t0 != 0); /* Carry */
1006 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1007 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1008 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1010 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1011 +-----+
1012 |a1 b0|
1013 +--+--+
1014 |r1|r0|
1015 +--+--+--+
1016 |a1 b1|
1017 +--+--+--+
1018 |q1 m0|
1019 +--+--+--+
1020 |q1 m1|
1021 -+--+--+--+
1022 |r1|r0| 0|
1023 +--+--+--+
1025 umul_ppmm (t1, t0, a1, b0);
1026 umul_ppmm (s1, s0, a1, b1);
1027 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1028 q = mi * t0;
1029 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1030 umul_ppmm (p1, p0, q, m0);
1031 umul_ppmm (s1, s0, q, m1);
1032 r0 += (t0 != 0); /* Carry */
1033 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1034 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1035 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1037 if (ge2 (r1, r0, m1, m0))
1038 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1040 *r1p = r1;
1041 return r0;
1044 ATTRIBUTE_CONST
1045 static uintmax_t
1046 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1048 uintmax_t y = one;
1050 if (e & 1)
1051 y = b;
1053 while (e != 0)
1055 b = mulredc (b, b, n, ni);
1056 e >>= 1;
1058 if (e & 1)
1059 y = mulredc (y, b, n, ni);
1062 return y;
1065 static uintmax_t
1066 powm2 (uintmax_t *r1m,
1067 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1068 uintmax_t ni, const uintmax_t *one)
1070 uintmax_t r1, r0, b1, b0, n1, n0;
1071 int i;
1072 uintmax_t e;
1074 b0 = bp[0];
1075 b1 = bp[1];
1076 n0 = np[0];
1077 n1 = np[1];
1079 r0 = one[0];
1080 r1 = one[1];
1082 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1084 if (e & 1)
1086 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1087 r1 = *r1m;
1089 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1090 b1 = *r1m;
1092 for (e = ep[1]; e > 0; e >>= 1)
1094 if (e & 1)
1096 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1097 r1 = *r1m;
1099 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1100 b1 = *r1m;
1102 *r1m = r1;
1103 return r0;
1106 ATTRIBUTE_CONST
1107 static bool
1108 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1109 int k, uintmax_t one)
1111 uintmax_t y = powm (b, q, n, ni, one);
1113 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1115 if (y == one || y == nm1)
1116 return true;
1118 for (int i = 1; i < k; i++)
1120 y = mulredc (y, y, n, ni);
1122 if (y == nm1)
1123 return true;
1124 if (y == one)
1125 return false;
1127 return false;
1130 ATTRIBUTE_PURE static bool
1131 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1132 const uintmax_t *qp, int k, const uintmax_t *one)
1134 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1136 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1137 y1 = r1m;
1139 if (y0 == one[0] && y1 == one[1])
1140 return true;
1142 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1144 if (y0 == nm1_0 && y1 == nm1_1)
1145 return true;
1147 for (int i = 1; i < k; i++)
1149 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1150 y1 = r1m;
1152 if (y0 == nm1_0 && y1 == nm1_1)
1153 return true;
1154 if (y0 == one[0] && y1 == one[1])
1155 return false;
1157 return false;
1160 static bool
1161 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1162 mpz_srcptr q, mp_bitcnt_t k)
1164 mpz_powm (y, x, q, n);
1166 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1167 return true;
1169 for (mp_bitcnt_t i = 1; i < k; i++)
1171 mpz_powm_ui (y, y, 2, n);
1172 if (mpz_cmp (y, nm1) == 0)
1173 return true;
1174 if (mpz_cmp_ui (y, 1) == 0)
1175 return false;
1177 return false;
1180 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1181 have been observed. The average seem to be about 2. */
1182 static bool ATTRIBUTE_PURE
1183 prime_p (uintmax_t n)
1185 bool is_prime;
1186 uintmax_t a_prim, one, ni;
1187 struct factors factors;
1189 if (n <= 1)
1190 return false;
1192 /* We have already cast out small primes. */
1193 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1194 return true;
1196 /* Precomputation for Miller-Rabin. */
1197 int k = stdc_trailing_zeros (n - 1);
1198 uintmax_t q = (n - 1) >> k;
1200 uintmax_t a = 2;
1201 binv (ni, n); /* ni <- 1/n mod B */
1202 redcify (one, 1, n);
1203 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1205 /* Perform a Miller-Rabin test, finds most composites quickly. */
1206 if (!millerrabin (n, ni, a_prim, q, k, one))
1207 return false;
1209 if (flag_prove_primality)
1211 /* Factor n-1 for Lucas. */
1212 factor (0, n - 1, &factors);
1215 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1216 number composite. */
1217 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1219 if (flag_prove_primality)
1221 is_prime = true;
1222 for (int i = 0; i < factors.nfactors && is_prime; i++)
1224 is_prime
1225 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1228 else
1230 /* After enough Miller-Rabin runs, be content. */
1231 is_prime = (r == MR_REPS - 1);
1234 if (is_prime)
1235 return true;
1237 a += primes_diff[r]; /* Establish new base. */
1239 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1240 on most processors, since it avoids udiv_qrnnd. If we go down the
1241 udiv_qrnnd_preinv path, this code should be replaced. */
1243 uintmax_t s1, s0;
1244 umul_ppmm (s1, s0, one, a);
1245 if (LIKELY (s1 == 0))
1246 a_prim = s0 % n;
1247 else
1249 MAYBE_UNUSED uintmax_t dummy;
1250 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1254 if (!millerrabin (n, ni, a_prim, q, k, one))
1255 return false;
1258 affirm (!"Lucas prime test failure. This should not happen");
1261 static bool ATTRIBUTE_PURE
1262 prime2_p (uintmax_t n1, uintmax_t n0)
1264 uintmax_t q[2], nm1[2];
1265 uintmax_t a_prim[2];
1266 uintmax_t one[2];
1267 uintmax_t na[2];
1268 uintmax_t ni;
1269 int k;
1270 struct factors factors;
1272 if (n1 == 0)
1273 return prime_p (n0);
1275 nm1[1] = n1 - (n0 == 0);
1276 nm1[0] = n0 - 1;
1277 if (nm1[0] == 0)
1279 assume (nm1[1]);
1280 k = stdc_trailing_zeros (nm1[1]);
1282 q[0] = nm1[1] >> k;
1283 q[1] = 0;
1284 k += W_TYPE_SIZE;
1286 else
1288 k = stdc_trailing_zeros (nm1[0]);
1289 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1292 uintmax_t a = 2;
1293 binv (ni, n0);
1294 redcify2 (one[1], one[0], 1, n1, n0);
1295 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1297 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1298 na[0] = n0;
1299 na[1] = n1;
1301 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1302 return false;
1304 if (flag_prove_primality)
1306 /* Factor n-1 for Lucas. */
1307 factor (nm1[1], nm1[0], &factors);
1310 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1311 number composite. */
1312 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1314 bool is_prime;
1315 uintmax_t e[2], y[2];
1317 if (flag_prove_primality)
1319 is_prime = true;
1320 if (hi (factors.plarge))
1322 uintmax_t pi;
1323 binv (pi, lo (factors.plarge));
1324 e[0] = pi * nm1[0];
1325 e[1] = 0;
1326 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1327 is_prime = (y[0] != one[0] || y[1] != one[1]);
1329 for (int i = 0; i < factors.nfactors && is_prime; i++)
1331 /* FIXME: We always have the factor 2. Do we really need to
1332 handle it here? We have done the same powering as part
1333 of millerrabin. */
1334 if (factors.p[i] == 2)
1335 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1336 else
1337 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1338 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1339 is_prime = (y[0] != one[0] || y[1] != one[1]);
1342 else
1344 /* After enough Miller-Rabin runs, be content. */
1345 is_prime = (r == MR_REPS - 1);
1348 if (is_prime)
1349 return true;
1351 a += primes_diff[r]; /* Establish new base. */
1352 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1354 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1355 return false;
1358 affirm (!"Lucas prime test failure. This should not happen");
1361 static bool
1362 mp_prime_p (mpz_t n)
1364 bool is_prime;
1365 mpz_t q, a, nm1, tmp;
1366 struct mp_factors factors;
1368 if (mpz_cmp_ui (n, 1) <= 0)
1369 return false;
1371 /* We have already cast out small primes. */
1372 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1373 return true;
1375 mpz_inits (q, a, nm1, tmp, nullptr);
1377 /* Precomputation for Miller-Rabin. */
1378 mpz_sub_ui (nm1, n, 1);
1380 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1381 mp_bitcnt_t k = mpz_scan1 (nm1, 0);
1382 mpz_tdiv_q_2exp (q, nm1, k);
1384 mpz_set_ui (a, 2);
1386 /* Perform a Miller-Rabin test, finds most composites quickly. */
1387 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1389 is_prime = false;
1390 goto ret2;
1393 if (flag_prove_primality)
1395 /* Factor n-1 for Lucas. */
1396 mpz_set (tmp, nm1);
1397 mp_factor (tmp, &factors);
1400 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1401 number composite. */
1402 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1404 if (flag_prove_primality)
1406 is_prime = true;
1407 for (idx_t i = 0; i < factors.nfactors && is_prime; i++)
1409 mpz_divexact (tmp, nm1, factors.p[i]);
1410 mpz_powm (tmp, a, tmp, n);
1411 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1414 else
1416 /* After enough Miller-Rabin runs, be content. */
1417 is_prime = (r == MR_REPS - 1);
1420 if (is_prime)
1421 goto ret1;
1423 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1425 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1427 is_prime = false;
1428 goto ret1;
1432 affirm (!"Lucas prime test failure. This should not happen");
1434 ret1:
1435 if (flag_prove_primality)
1436 mp_factor_clear (&factors);
1437 ret2:
1438 mpz_clears (q, a, nm1, tmp, nullptr);
1440 return is_prime;
1443 static void
1444 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1445 struct factors *factors)
1447 uintmax_t x, z, y, P, t, ni, g;
1449 unsigned long int k = 1;
1450 unsigned long int l = 1;
1452 redcify (P, 1, n);
1453 addmod (x, P, P, n); /* i.e., redcify(2) */
1454 y = z = x;
1456 while (n != 1)
1458 affirm (a < n);
1460 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1462 for (;;)
1466 x = mulredc (x, x, n, ni);
1467 addmod (x, x, a, n);
1469 submod (t, z, x, n);
1470 P = mulredc (P, t, n, ni);
1472 if (k % 32 == 1)
1474 if (gcd_odd (P, n) != 1)
1475 goto factor_found;
1476 y = x;
1479 while (--k != 0);
1481 z = x;
1482 k = l;
1483 l = 2 * l;
1484 for (unsigned long int i = 0; i < k; i++)
1486 x = mulredc (x, x, n, ni);
1487 addmod (x, x, a, n);
1489 y = x;
1492 factor_found:
1495 y = mulredc (y, y, n, ni);
1496 addmod (y, y, a, n);
1498 submod (t, z, y, n);
1499 g = gcd_odd (t, n);
1501 while (g == 1);
1503 if (n == g)
1505 /* Found n itself as factor. Restart with different params. */
1506 factor_using_pollard_rho (n, a + 1, factors);
1507 return;
1510 n = n / g;
1512 if (!prime_p (g))
1513 factor_using_pollard_rho (g, a + 1, factors);
1514 else
1515 factor_insert (factors, g);
1517 if (prime_p (n))
1519 factor_insert (factors, n);
1520 break;
1523 x = x % n;
1524 z = z % n;
1525 y = y % n;
1529 static void
1530 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1531 struct factors *factors)
1533 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1535 unsigned long int k = 1;
1536 unsigned long int l = 1;
1538 redcify2 (P1, P0, 1, n1, n0);
1539 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1540 y1 = z1 = x1;
1541 y0 = z0 = x0;
1543 while (n1 != 0 || n0 != 1)
1545 binv (ni, n0);
1547 for (;;)
1551 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1552 x1 = r1m;
1553 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1555 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1556 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1557 P1 = r1m;
1559 if (k % 32 == 1)
1561 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1562 if (g1 != 0 || g0 != 1)
1563 goto factor_found;
1564 y1 = x1; y0 = x0;
1567 while (--k != 0);
1569 z1 = x1; z0 = x0;
1570 k = l;
1571 l = 2 * l;
1572 for (unsigned long int i = 0; i < k; i++)
1574 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1575 x1 = r1m;
1576 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1578 y1 = x1; y0 = x0;
1581 factor_found:
1584 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1585 y1 = r1m;
1586 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1588 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1589 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1591 while (g1 == 0 && g0 == 1);
1593 if (g1 == 0)
1595 /* The found factor is one word, and > 1. */
1596 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1598 if (!prime_p (g0))
1599 factor_using_pollard_rho (g0, a + 1, factors);
1600 else
1601 factor_insert (factors, g0);
1603 else
1605 /* The found factor is two words. This is highly unlikely, thus hard
1606 to trigger. Please be careful before you change this code! */
1607 uintmax_t ginv;
1609 if (n1 == g1 && n0 == g0)
1611 /* Found n itself as factor. Restart with different params. */
1612 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1613 return;
1616 /* Compute n = n / g. Since the result will fit one word,
1617 we can compute the quotient modulo B, ignoring the high
1618 divisor word. */
1619 binv (ginv, g0);
1620 n0 = ginv * n0;
1621 n1 = 0;
1623 if (!prime2_p (g1, g0))
1624 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1625 else
1626 factor_insert_large (factors, g1, g0);
1629 if (n1 == 0)
1631 if (prime_p (n0))
1633 factor_insert (factors, n0);
1634 break;
1637 factor_using_pollard_rho (n0, a, factors);
1638 return;
1641 if (prime2_p (n1, n0))
1643 factor_insert_large (factors, n1, n0);
1644 break;
1647 uuset (&x1, &x0, mod2 (x1, x0, n1, n0));
1648 uuset (&z1, &z0, mod2 (z1, z0, n1, n0));
1649 uuset (&y1, &y0, mod2 (y1, y0, n1, n0));
1653 static void
1654 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1655 struct mp_factors *factors)
1657 mpz_t x, z, y, P;
1658 mpz_t t, t2;
1660 devmsg ("[pollard-rho (%lu)] ", a);
1662 mpz_inits (t, t2, nullptr);
1663 mpz_init_set_si (y, 2);
1664 mpz_init_set_si (x, 2);
1665 mpz_init_set_si (z, 2);
1666 mpz_init_set_ui (P, 1);
1668 unsigned long long int k = 1;
1669 unsigned long long int l = 1;
1671 while (mpz_cmp_ui (n, 1) != 0)
1673 for (;;)
1677 mpz_mul (t, x, x);
1678 mpz_mod (x, t, n);
1679 mpz_add_ui (x, x, a);
1681 mpz_sub (t, z, x);
1682 mpz_mul (t2, P, t);
1683 mpz_mod (P, t2, n);
1685 if (k % 32 == 1)
1687 mpz_gcd (t, P, n);
1688 if (mpz_cmp_ui (t, 1) != 0)
1689 goto factor_found;
1690 mpz_set (y, x);
1693 while (--k != 0);
1695 mpz_set (z, x);
1696 k = l;
1697 l = 2 * l;
1698 for (unsigned long long int i = 0; i < k; i++)
1700 mpz_mul (t, x, x);
1701 mpz_mod (x, t, n);
1702 mpz_add_ui (x, x, a);
1704 mpz_set (y, x);
1707 factor_found:
1710 mpz_mul (t, y, y);
1711 mpz_mod (y, t, n);
1712 mpz_add_ui (y, y, a);
1714 mpz_sub (t, z, y);
1715 mpz_gcd (t, t, n);
1717 while (mpz_cmp_ui (t, 1) == 0);
1719 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1721 if (!mp_prime_p (t))
1723 devmsg ("[composite factor--restarting pollard-rho] ");
1724 mp_factor_using_pollard_rho (t, a + 1, factors);
1726 else
1728 mp_factor_insert (factors, t);
1731 if (mp_prime_p (n))
1733 mp_factor_insert (factors, n);
1734 break;
1737 mpz_mod (x, x, n);
1738 mpz_mod (z, z, n);
1739 mpz_mod (y, y, n);
1742 mpz_clears (P, t2, t, z, x, y, nullptr);
1745 #if USE_SQUFOF
1746 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1747 algorithm is replaced, consider also returning the remainder. */
1748 ATTRIBUTE_CONST
1749 static uintmax_t
1750 isqrt (uintmax_t n)
1752 if (n == 0)
1753 return 0;
1755 int c = stdc_leading_zeros (n);
1757 /* Make x > sqrt(n). This will be invariant through the loop. */
1758 uintmax_t x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
1760 for (;;)
1762 uintmax_t y = (x + n / x) / 2;
1763 if (y >= x)
1764 return x;
1766 x = y;
1770 ATTRIBUTE_CONST
1771 static uintmax_t
1772 isqrt2 (uintmax_t nh, uintmax_t nl)
1774 /* Ensures the remainder fits in an uintmax_t. */
1775 affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1777 if (nh == 0)
1778 return isqrt (nl);
1780 int shift = stdc_leading_zeros (nh) & ~1;
1782 /* Make x > sqrt (n). */
1783 uintmax_t x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1784 x <<= (W_TYPE_SIZE - shift) >> 1;
1786 /* Do we need more than one iteration? */
1787 for (;;)
1789 MAYBE_UNUSED uintmax_t r;
1790 uintmax_t q, y;
1791 udiv_qrnnd (q, r, nh, nl, x);
1792 y = (x + q) / 2;
1794 if (y >= x)
1796 uintmax_t hi, lo;
1797 umul_ppmm (hi, lo, x + 1, x + 1);
1798 affirm (gt2 (hi, lo, nh, nl));
1800 umul_ppmm (hi, lo, x, x);
1801 affirm (ge2 (nh, nl, hi, lo));
1802 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1803 affirm (hi == 0);
1805 return x;
1808 x = y;
1812 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1813 # define MAGIC64 0x0202021202030213ULL
1814 # define MAGIC63 0x0402483012450293ULL
1815 # define MAGIC65 0x218a019866014613ULL
1816 # define MAGIC11 0x23b
1818 /* Return the square root if the input is a square, otherwise 0. */
1819 ATTRIBUTE_CONST
1820 static uintmax_t
1821 is_square (uintmax_t x)
1823 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1824 computing the square root. */
1825 if (((MAGIC64 >> (x & 63)) & 1)
1826 && ((MAGIC63 >> (x % 63)) & 1)
1827 /* Both 0 and 64 are squares mod (65). */
1828 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1829 && ((MAGIC11 >> (x % 11) & 1)))
1831 uintmax_t r = isqrt (x);
1832 if (r * r == x)
1833 return r;
1835 return 0;
1838 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1839 static short const invtab[0x81] =
1841 0x200,
1842 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1843 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1844 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1845 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1846 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1847 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1848 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1849 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1850 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1851 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1852 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1853 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1854 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1855 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1856 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1857 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1860 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1861 that q < 0x40; here it instead uses a table of (Euclidean) inverses. */
1862 # define div_smallq(q, r, u, d) \
1863 do { \
1864 if ((u) / 0x40 < (d)) \
1866 uintmax_t _dinv, _mask, _q, _r; \
1867 int _cnt = stdc_leading_zeros (d); \
1868 _r = (u); \
1869 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1871 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1872 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1874 else \
1876 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1877 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1879 _r -= _q * (d); \
1881 _mask = -(uintmax_t) (_r >= (d)); \
1882 (r) = _r - (_mask & (d)); \
1883 (q) = _q - _mask; \
1884 affirm ((q) * (d) + (r) == u); \
1886 else \
1888 uintmax_t _q = (u) / (d); \
1889 (r) = (u) - _q * (d); \
1890 (q) = _q; \
1892 } while (0)
1894 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1895 = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1896 with 3025 = 55^2.
1898 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1899 representing G_0 = (-55, 2 * 4652, 8653).
1901 In the notation of the paper:
1903 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1907 t_0 = floor([q_0 + R_0] / S0) = 1
1908 R_1 = t_0 * S_0 - R_0 = 4001
1909 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1912 /* Multipliers, in order of efficiency:
1913 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1914 0.7317 3*5*7 = 105 = 1
1915 0.7820 3*5*11 = 165 = 1
1916 0.7872 3*5 = 15 = 3
1917 0.8101 3*7*11 = 231 = 3
1918 0.8155 3*7 = 21 = 1
1919 0.8284 5*7*11 = 385 = 1
1920 0.8339 5*7 = 35 = 3
1921 0.8716 3*11 = 33 = 1
1922 0.8774 3 = 3 = 3
1923 0.8913 5*11 = 55 = 3
1924 0.8972 5 = 5 = 1
1925 0.9233 7*11 = 77 = 1
1926 0.9295 7 = 7 = 3
1927 0.9934 11 = 11 = 3
1929 # define QUEUE_SIZE 50
1930 #endif
1932 #if STAT_SQUFOF
1933 # define Q_FREQ_SIZE 50
1934 /* Element 0 keeps the total */
1935 static int q_freq[Q_FREQ_SIZE + 1];
1936 #endif
1938 #if USE_SQUFOF
1939 /* Return true on success. Expected to fail only for numbers
1940 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1941 static bool
1942 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1944 /* Uses algorithm and notation from
1946 SQUARE FORM FACTORIZATION
1947 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1949 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1952 static short const multipliers_1[] =
1953 { /* = 1 (mod 4) */
1954 105, 165, 21, 385, 33, 5, 77, 1, 0
1956 static short const multipliers_3[] =
1957 { /* = 3 (mod 4) */
1958 1155, 15, 231, 35, 3, 55, 7, 11, 0
1961 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1963 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1964 return false;
1966 uintmax_t sqrt_n = isqrt2 (n1, n0);
1968 if (n0 == sqrt_n * sqrt_n)
1970 uintmax_t p1, p0;
1972 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1973 affirm (p0 == n0);
1975 if (n1 == p1)
1977 if (prime_p (sqrt_n))
1978 factor_insert_multiplicity (factors, sqrt_n, 2);
1979 else
1981 struct factors f;
1983 f.nfactors = 0;
1984 if (!factor_using_squfof (0, sqrt_n, &f))
1986 /* Try pollard rho instead */
1987 factor_using_pollard_rho (sqrt_n, 1, &f);
1989 /* Duplicate the new factors */
1990 for (unsigned int i = 0; i < f.nfactors; i++)
1991 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
1993 return true;
1997 /* Select multipliers so we always get n * mu = 3 (mod 4) */
1998 for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
1999 *m; m++)
2001 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2002 unsigned int i;
2003 unsigned int mu = *m;
2004 int qpos = 0;
2006 affirm (mu * n0 % 4 == 3);
2008 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2009 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2010 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2011 mu^3 < n.
2013 However, this seems insufficient: With n = 37243139 and mu =
2014 105, we get a trivial factor, from the square 38809 = 197^2,
2015 without any corresponding Q earlier in the iteration.
2017 Requiring 64 mu^3 < n seems sufficient. */
2018 if (n1 == 0)
2020 if ((uintmax_t) mu * mu * mu >= n0 / 64)
2021 continue;
2023 else
2025 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2026 continue;
2028 umul_ppmm (Dh, Dl, n0, mu);
2029 Dh += n1 * mu;
2031 affirm (Dl % 4 != 1);
2032 affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2034 S = isqrt2 (Dh, Dl);
2036 Q1 = 1;
2037 P = S;
2039 /* Square root remainder fits in one word, so ignore high part. */
2040 Q = Dl - P * P;
2041 /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
2042 L = isqrt (2 * S);
2043 B = 2 * L;
2044 L1 = mu * 2 * L;
2046 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2047 4 D. */
2049 for (i = 0; i <= B; i++)
2051 uintmax_t q, P1, t, rem;
2053 div_smallq (q, rem, S + P, Q);
2054 P1 = S - rem; /* P1 = q*Q - P */
2056 affirm (q > 0 && Q > 0);
2058 # if STAT_SQUFOF
2059 q_freq[0]++;
2060 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2061 # endif
2063 if (Q <= L1)
2065 uintmax_t g = Q;
2067 if ((Q & 1) == 0)
2068 g /= 2;
2070 g /= gcd_odd (g, mu);
2072 if (g <= L)
2074 if (qpos >= QUEUE_SIZE)
2075 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2076 queue[qpos].Q = g;
2077 queue[qpos].P = P % g;
2078 qpos++;
2082 /* I think the difference can be either sign, but mod
2083 2^W_TYPE_SIZE arithmetic should be fine. */
2084 t = Q1 + q * (P - P1);
2085 Q1 = Q;
2086 Q = t;
2087 P = P1;
2089 if ((i & 1) == 0)
2091 uintmax_t r = is_square (Q);
2092 if (r)
2094 for (int j = 0; j < qpos; j++)
2096 if (queue[j].Q == r)
2098 if (r == 1)
2099 /* Traversed entire cycle. */
2100 goto next_multiplier;
2102 /* Need the absolute value for divisibility test. */
2103 if (P >= queue[j].P)
2104 t = P - queue[j].P;
2105 else
2106 t = queue[j].P - P;
2107 if (t % r == 0)
2109 /* Delete entries up to and including entry
2110 j, which matched. */
2111 memmove (queue, queue + j + 1,
2112 (qpos - j - 1) * sizeof (queue[0]));
2113 qpos -= (j + 1);
2115 goto next_i;
2119 /* We have found a square form, which should give a
2120 factor. */
2121 Q1 = r;
2122 affirm (S >= P); /* What signs are possible? */
2123 P += r * ((S - P) / r);
2125 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2126 for the case D = 2N. */
2127 /* Compute Q = (D - P*P) / Q1, but we need double
2128 precision. */
2129 uintmax_t hi, lo;
2130 umul_ppmm (hi, lo, P, P);
2131 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2132 udiv_qrnnd (Q, rem, hi, lo, Q1);
2133 affirm (rem == 0);
2135 for (;;)
2137 /* Note: There appears to by a typo in the paper,
2138 Step 4a in the algorithm description says q <--
2139 floor([S+P]/\hat Q), but looking at the equations
2140 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2141 (In this code, \hat Q is Q1). */
2142 div_smallq (q, rem, S + P, Q);
2143 P1 = S - rem; /* P1 = q*Q - P */
2145 # if STAT_SQUFOF
2146 q_freq[0]++;
2147 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2148 # endif
2149 if (P == P1)
2150 break;
2151 t = Q1 + q * (P - P1);
2152 Q1 = Q;
2153 Q = t;
2154 P = P1;
2157 if ((Q & 1) == 0)
2158 Q /= 2;
2159 Q /= gcd_odd (Q, mu);
2161 affirm (Q > 1 && (n1 || Q < n0));
2163 if (prime_p (Q))
2164 factor_insert (factors, Q);
2165 else if (!factor_using_squfof (0, Q, factors))
2166 factor_using_pollard_rho (Q, 2, factors);
2168 divexact_21 (n1, n0, n1, n0, Q);
2170 if (prime2_p (n1, n0))
2171 factor_insert_large (factors, n1, n0);
2172 else
2174 if (!factor_using_squfof (n1, n0, factors))
2176 if (n1 == 0)
2177 factor_using_pollard_rho (n0, 1, factors);
2178 else
2179 factor_using_pollard_rho2 (n1, n0, 1, factors);
2183 return true;
2186 next_i:;
2188 next_multiplier:;
2190 return false;
2192 #endif
2194 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2195 results in FACTORS. */
2196 static void
2197 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2199 factors->nfactors = 0;
2200 hiset (&factors->plarge, 0);
2202 if (t1 == 0 && t0 < 2)
2203 return;
2205 t0 = factor_using_division (&t1, t1, t0, factors);
2207 if (t1 == 0 && t0 < 2)
2208 return;
2210 if (prime2_p (t1, t0))
2211 factor_insert_large (factors, t1, t0);
2212 else
2214 #if USE_SQUFOF
2215 if (factor_using_squfof (t1, t0, factors))
2216 return;
2217 #endif
2219 if (t1 == 0)
2220 factor_using_pollard_rho (t0, 1, factors);
2221 else
2222 factor_using_pollard_rho2 (t1, t0, 1, factors);
2226 /* Use Pollard-rho to compute the prime factors of
2227 arbitrary-precision T, and put the results in FACTORS. */
2228 static void
2229 mp_factor (mpz_t t, struct mp_factors *factors)
2231 mp_factor_init (factors);
2233 if (mpz_sgn (t) != 0)
2235 mp_factor_using_division (t, factors);
2237 if (mpz_cmp_ui (t, 1) != 0)
2239 devmsg ("[is number prime?] ");
2240 if (mp_prime_p (t))
2241 mp_factor_insert (factors, t);
2242 else
2243 mp_factor_using_pollard_rho (t, 1, factors);
2248 static strtol_error
2249 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2251 int lo_carry;
2252 uintmax_t hi = 0, lo = 0;
2254 strtol_error err = LONGINT_INVALID;
2256 /* Initial scan for invalid digits. */
2257 char const *p = s;
2258 for (;;)
2260 unsigned char c = *p++;
2261 if (c == 0)
2262 break;
2264 if (UNLIKELY (!ISDIGIT (c)))
2266 err = LONGINT_INVALID;
2267 break;
2270 err = LONGINT_OK; /* we've seen at least one valid digit */
2273 while (err == LONGINT_OK)
2275 unsigned char c = *s++;
2276 if (c == 0)
2277 break;
2279 c -= '0';
2281 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2283 err = LONGINT_OVERFLOW;
2284 break;
2286 hi = 10 * hi;
2288 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2289 lo_carry += 10 * lo < 2 * lo;
2291 lo = 10 * lo;
2292 lo += c;
2294 lo_carry += lo < c;
2295 hi += lo_carry;
2296 if (UNLIKELY (hi < lo_carry))
2298 err = LONGINT_OVERFLOW;
2299 break;
2303 *hip = hi;
2304 *lop = lo;
2306 return err;
2309 /* Structure and routines for buffering and outputting full lines,
2310 to support parallel operation efficiently. */
2311 static struct lbuf_
2313 char *buf;
2314 char *end;
2315 } lbuf;
2317 /* 512 is chosen to give good performance,
2318 and also is the max guaranteed size that
2319 consumers can read atomically through pipes.
2320 Also it's big enough to cater for max line length
2321 even with 128 bit uintmax_t. */
2322 #define FACTOR_PIPE_BUF 512
2324 static void
2325 lbuf_alloc (void)
2327 if (lbuf.buf)
2328 return;
2330 /* Double to ensure enough space for
2331 previous numbers + next number. */
2332 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2333 lbuf.end = lbuf.buf;
2336 /* Write complete LBUF to standard output. */
2337 static void
2338 lbuf_flush (void)
2340 size_t size = lbuf.end - lbuf.buf;
2341 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2342 write_error ();
2343 lbuf.end = lbuf.buf;
2346 /* Add a character C to LBUF and if it's a newline
2347 and enough bytes are already buffered,
2348 then write atomically to standard output. */
2349 static void
2350 lbuf_putc (char c)
2352 *lbuf.end++ = c;
2354 if (c == '\n')
2356 size_t buffered = lbuf.end - lbuf.buf;
2358 /* Provide immediate output for interactive use. */
2359 static int line_buffered = -1;
2360 if (line_buffered == -1)
2361 line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2362 if (line_buffered)
2363 lbuf_flush ();
2364 else if (buffered >= FACTOR_PIPE_BUF)
2366 /* Write output in <= PIPE_BUF chunks
2367 so consumers can read atomically. */
2368 char const *tend = lbuf.end;
2370 /* Since a umaxint_t's factors must fit in 512
2371 we're guaranteed to find a newline here. */
2372 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2373 while (*--tlend != '\n');
2374 tlend++;
2376 lbuf.end = tlend;
2377 lbuf_flush ();
2379 /* Buffer the remainder. */
2380 memcpy (lbuf.buf, tlend, tend - tlend);
2381 lbuf.end = lbuf.buf + (tend - tlend);
2386 /* Buffer an int to the internal LBUF. */
2387 static void
2388 lbuf_putint (uintmax_t i, size_t min_width)
2390 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2391 char const *umaxstr = umaxtostr (i, buf);
2392 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2393 size_t z = width;
2395 for (; z < min_width; z++)
2396 *lbuf.end++ = '0';
2398 memcpy (lbuf.end, umaxstr, width);
2399 lbuf.end += width;
2402 static void
2403 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2405 uintmax_t q, r;
2407 if (t1 == 0)
2408 lbuf_putint (t0, 0);
2409 else
2411 /* Use very plain code here since it seems hard to write fast code
2412 without assuming a specific word size. */
2413 q = t1 / 1000000000;
2414 r = t1 % 1000000000;
2415 udiv_qrnnd (t0, r, r, t0, 1000000000);
2416 print_uintmaxes (q, t0);
2417 lbuf_putint (r, 9);
2421 /* Single-precision factoring */
2422 static void
2423 print_factors_single (uintmax_t t1, uintmax_t t0)
2425 struct factors factors;
2427 print_uintmaxes (t1, t0);
2428 lbuf_putc (':');
2430 factor (t1, t0, &factors);
2432 for (int j = 0; j < factors.nfactors; j++)
2433 for (int k = 0; k < factors.e[j]; k++)
2435 lbuf_putc (' ');
2436 print_uintmaxes (0, factors.p[j]);
2437 if (print_exponents && factors.e[j] > 1)
2439 lbuf_putc ('^');
2440 lbuf_putint (factors.e[j], 0);
2441 break;
2445 if (hi (factors.plarge))
2447 lbuf_putc (' ');
2448 print_uintmaxes (hi (factors.plarge), lo (factors.plarge));
2451 lbuf_putc ('\n');
2454 /* Emit the factors of the indicated number. If we have the option of using
2455 either algorithm, we select on the basis of the length of the number.
2456 For longer numbers, we prefer the MP algorithm even if the native algorithm
2457 has enough digits, because the algorithm is better. The turnover point
2458 depends on the value. */
2459 static bool
2460 print_factors (char const *input)
2462 /* Skip initial spaces and '+'. */
2463 char const *str = input;
2464 while (*str == ' ')
2465 str++;
2466 str += *str == '+';
2468 uintmax_t t1, t0;
2470 /* Try converting the number to one or two words. If it fails, use GMP or
2471 print an error message. The 2nd condition checks that the most
2472 significant bit of the two-word number is clear, in a typesize neutral
2473 way. */
2474 strtol_error err = strto2uintmax (&t1, &t0, str);
2476 switch (err)
2478 case LONGINT_OK:
2479 if (((t1 << 1) >> 1) == t1)
2481 devmsg ("[using single-precision arithmetic] ");
2482 print_factors_single (t1, t0);
2483 return true;
2485 break;
2487 case LONGINT_OVERFLOW:
2488 /* Try GMP. */
2489 break;
2491 default:
2492 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2493 return false;
2496 devmsg ("[using arbitrary-precision arithmetic] ");
2497 mpz_t t;
2498 struct mp_factors factors;
2500 mpz_init_set_str (t, str, 10);
2502 mpz_out_str (stdout, 10, t);
2503 putchar (':');
2504 mp_factor (t, &factors);
2506 for (idx_t j = 0; j < factors.nfactors; j++)
2507 for (unsigned long int k = 0; k < factors.e[j]; k++)
2509 putchar (' ');
2510 mpz_out_str (stdout, 10, factors.p[j]);
2511 if (print_exponents && factors.e[j] > 1)
2513 printf ("^%lu", factors.e[j]);
2514 break;
2518 mp_factor_clear (&factors);
2519 mpz_clear (t);
2520 putchar ('\n');
2521 fflush (stdout);
2522 return true;
2525 void
2526 usage (int status)
2528 if (status != EXIT_SUCCESS)
2529 emit_try_help ();
2530 else
2532 printf (_("\
2533 Usage: %s [OPTION] [NUMBER]...\n\
2535 program_name);
2536 fputs (_("\
2537 Print the prime factors of each specified integer NUMBER. If none\n\
2538 are specified on the command line, read them from standard input.\n\
2540 "), stdout);
2541 fputs ("\
2542 -h, --exponents print repeated factors in form p^e unless e is 1\n\
2543 ", stdout);
2544 fputs (HELP_OPTION_DESCRIPTION, stdout);
2545 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2546 emit_ancillary_info (PROGRAM_NAME);
2548 exit (status);
2551 static bool
2552 do_stdin (void)
2554 bool ok = true;
2555 token_buffer tokenbuffer;
2557 init_tokenbuffer (&tokenbuffer);
2559 while (true)
2561 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2562 &tokenbuffer);
2563 if (token_length == (size_t) -1)
2565 if (ferror (stdin))
2566 error (EXIT_FAILURE, errno, _("error reading input"));
2567 break;
2570 ok &= print_factors (tokenbuffer.buffer);
2572 free (tokenbuffer.buffer);
2574 return ok;
2578 main (int argc, char **argv)
2580 initialize_main (&argc, &argv);
2581 set_program_name (argv[0]);
2582 setlocale (LC_ALL, "");
2583 bindtextdomain (PACKAGE, LOCALEDIR);
2584 textdomain (PACKAGE);
2586 lbuf_alloc ();
2587 atexit (close_stdout);
2588 atexit (lbuf_flush);
2590 int c;
2591 while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1)
2593 switch (c)
2595 case 'h': /* NetBSD used -h for this functionality first. */
2596 print_exponents = true;
2597 break;
2599 case DEV_DEBUG_OPTION:
2600 dev_debug = true;
2601 break;
2603 case_GETOPT_HELP_CHAR;
2605 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2607 default:
2608 usage (EXIT_FAILURE);
2612 #if STAT_SQUFOF
2613 memset (q_freq, 0, sizeof (q_freq));
2614 #endif
2616 bool ok;
2617 if (argc <= optind)
2618 ok = do_stdin ();
2619 else
2621 ok = true;
2622 for (int i = optind; i < argc; i++)
2623 if (! print_factors (argv[i]))
2624 ok = false;
2627 #if STAT_SQUFOF
2628 if (q_freq[0] > 0)
2630 double acc_f;
2631 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2632 for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2634 double f = (double) q_freq[i] / q_freq[0];
2635 acc_f += f;
2636 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2637 100.0 * f, 100.0 * acc_f);
2640 #endif
2642 return ok ? EXIT_SUCCESS : EXIT_FAILURE;