2 * zmod - modulo arithmetic routines
4 * Copyright (C) 1999-2007 David I. Bell, Landon Curt Noll and Ernest Bowen
6 * Primary author: David I. Bell
8 * Calc is open software; you can redistribute it and/or modify it under
9 * the terms of the version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
12 * Calc is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
15 * Public License for more details.
17 * A copy of version 2.1 of the GNU Lesser General Public License is
18 * distributed with calc under the filename COPYING-LGPL. You should have
19 * received a copy with calc; if not, write to Free Software Foundation, Inc.
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * @(#) $Revision: 30.1 $
23 * @(#) $Id: zmod.c,v 30.1 2007/03/16 11:09:46 chongo Exp $
24 * @(#) $Source: /usr/local/src/bin/calc/RCS/zmod.c,v $
26 * Under source code control: 1991/05/22 23:03:55
27 * File existed as early as: 1991
29 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
33 * Routines to do modulo arithmetic both normally and also using the REDC
34 * algorithm given by Peter L. Montgomery in Mathematics of Computation,
35 * volume 44, number 170 (April, 1985). For multiple multiplies using
36 * the same large modulus, the REDC algorithm avoids the usual division
37 * by the modulus, instead replacing it with two multiplies or else a
38 * special algorithm. When these two multiplies or the special algorithm
39 * are faster then the division, then the REDC algorithm is better.
47 #define POWBITS 4 /* bits for power chunks (must divide BASEB) */
48 #define POWNUMS (1<<POWBITS) /* number of powers needed in table */
50 S_FUNC
void zmod5(ZVALUE
*zp
);
51 S_FUNC
void zmod6(ZVALUE z1
, ZVALUE
*res
);
52 S_FUNC
void zredcmodinv(ZVALUE z1
, ZVALUE
*res
);
54 STATIC REDC
*powermodredc
= NULL
; /* REDC info for raising to power */
56 BOOL havelastmod
= FALSE
;
57 STATIC ZVALUE lastmod
[1];
58 STATIC ZVALUE lastmodinv
[1];
62 * Square a number and then mod the result with a second number.
63 * The number to be squared can be negative or out of modulo range.
64 * The result will be in the range 0 to the modulus - 1.
67 * z1 number to be squared
68 * z2 number to take mod with
72 zsquaremod(ZVALUE z1
, ZVALUE z2
, ZVALUE
*res
)
78 if (ziszero(z2
) || zisneg(z2
))
79 math_error("Mod of non-positive integer");
81 if (ziszero(z1
) || zisunit(z2
)) {
87 * If the modulus is a single digit number, then do the result
88 * cheaply. Check especially for a small power of two.
92 if ((digit
& -digit
) == digit
) { /* NEEDS 2'S COMP */
93 prod
= (FULL
) z1
.v
[0];
94 prod
= (prod
* prod
) & (digit
- 1);
97 prod
= (FULL
) zmodi(z1
, (long) digit
);
98 prod
= (prod
* prod
) % digit
;
100 itoz((long) prod
, res
);
105 * The modulus is more than one digit.
106 * Actually do the square and divide if necessary.
109 if ((tmp
.len
< z2
.len
) ||
110 ((tmp
.len
== z2
.len
) && (tmp
.v
[tmp
.len
-1] < z2
.v
[z2
.len
-1]))) {
114 zmod(tmp
, z2
, res
, 0);
120 * Calculate the number congruent to the given number whose absolute
121 * value is minimal. The number to be reduced can be negative or out of
122 * modulo range. The result will be within the range -int((modulus-1)/2)
123 * to int(modulus/2) inclusive. For example, for modulus 7, numbers are
124 * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to
128 * z1 number to find minimum congruence of
129 * z2 number to take mod with
133 zminmod(ZVALUE z1
, ZVALUE z2
, ZVALUE
*res
)
139 if (ziszero(z2
) || zisneg(z2
)) {
140 math_error("Mod of non-positive integer");
143 if (ziszero(z1
) || zisunit(z2
)) {
156 * Do a quick check to see if the number is very small compared
157 * to the modulus. If so, then the result is obvious.
159 if (z1
.len
< z2
.len
- 1) {
165 * Now make sure the input number is within the modulo range.
166 * If not, then reduce it to be within range and make the
178 z1
.sign
= (BOOL
)sign
;
179 zmod(z1
, z2
, &tmp1
, 0);
180 if (tmp1
.len
< z2
.len
- 1) {
188 * Now calculate the difference of the modulus and the absolute
189 * value of the original number. Compare the original number with
190 * the difference, and return the one with the smallest absolute
191 * value, with the correct sign. If the two values are equal, then
192 * return the positive result.
194 zsub(z2
, tmp1
, &tmp2
);
195 cv
= zrel(tmp1
, tmp2
);
198 tmp1
.sign
= (BOOL
)sign
;
214 * Compare two numbers for equality modulo a third number.
215 * The two numbers to be compared can be negative or out of modulo range.
216 * Returns TRUE if the numbers are not congruent, and FALSE if they are
220 * z1 first number to be compared
221 * z2 second number to be compared
225 zcmpmod(ZVALUE z1
, ZVALUE z2
, ZVALUE z3
)
227 ZVALUE tmp1
, tmp2
, tmp3
;
232 if (zisneg(z3
) || ziszero(z3
)) {
233 math_error("Non-positive modulus in zcmpmod");
237 return (((z1
.v
[0] + z2
.v
[0]) & 0x1) != 0);
240 * If the two numbers are equal, then their mods are equal.
242 if ((z1
.sign
== z2
.sign
) && (z1
.len
== z2
.len
) &&
243 (z1
.v
[0] == z2
.v
[0]) && (zcmp(z1
, z2
) == 0))
247 * If both numbers are negative, then we can make them positive.
249 if (zisneg(z1
) && zisneg(z2
)) {
255 * For small negative numbers, make them positive before comparing.
256 * In any case, the resulting numbers are in tmp1 and tmp2.
261 digit
= z3
.v
[len
- 1];
263 if (zisneg(z1
) && ((z1
.len
< len
) ||
264 ((z1
.len
== len
) && (z1
.v
[z1
.len
- 1] < digit
))))
267 if (zisneg(z2
) && ((z2
.len
< len
) ||
268 ((z2
.len
== len
) && (z2
.v
[z2
.len
- 1] < digit
))))
272 * Now compare the two numbers for equality.
273 * If they are equal we are all done.
275 if (zcmp(tmp1
, tmp2
) == 0) {
284 * They are not identical. Now if both numbers are positive
285 * and less than the modulus, then they are definitely not equal.
287 if ((tmp1
.sign
== tmp2
.sign
) &&
288 ((tmp1
.len
< len
) || (zrel(tmp1
, z3
) < 0)) &&
289 ((tmp2
.len
< len
) || (zrel(tmp2
, z3
) < 0))) {
298 * Either one of the numbers is negative or is large.
299 * So do the standard thing and subtract the two numbers.
300 * Then they are equal if the result is 0 (mod z3).
302 zsub(tmp1
, tmp2
, &tmp3
);
309 * Compare the result with the modulus to see if it is equal to
310 * or less than the modulus. If so, we know the mod result.
324 * We are forced to actually do the division.
325 * The numbers are congruent if the result is zero.
327 zmod(tmp3
, z3
, &tmp1
, 0);
340 * Given the address of a positive integer whose word count does not
341 * exceed twice that of the modulus stored at lastmod, to evaluate and store
342 * at that address the value of the integer modulo the modulus.
356 if (zrel(*zp
, *lastmod
) < 0)
358 modlen
= lastmod
->len
;
360 z1
.v
= zp
->v
+ modlen
- 1;
361 z1
.len
= len
- modlen
+ 1;
362 z1
.sign
= z2
.sign
= z3
.sign
= 0;
363 if (z1
.len
> modlen
+ 1) {
364 math_error("Bad call to zmod5!!!");
367 z2
.v
= lastmodinv
->v
+ modlen
+ 1 - z1
.len
;
368 z2
.len
= lastmodinv
->len
- modlen
- 1 + z1
.len
;
370 z3
.v
= tmp1
.v
+ z1
.len
;
371 z3
.len
= tmp1
.len
- z1
.len
;
373 zmul(z3
, *lastmod
, &tmp2
);
380 f
= (FULL
) *a
- (FULL
) *b
++ - (FULL
) u
;
382 u
= - (HALF
) (f
>> BASEB
);
386 if (tmp2
.len
> modlen
)
387 f
= (FULL
) *a
- (FULL
) *b
- (FULL
) u
;
389 f
= (FULL
) *a
- (FULL
) u
;
392 while (len
> 0 && *--a
== 0)
398 while (len
> 0 && zrel(*zp
, *lastmod
) >= 0) {
401 math_error("Too many subtractions in zmod5");
409 f
= (FULL
) *a
- (FULL
) *b
++ - (FULL
) u
;
411 u
= - (HALF
) (f
>> BASEB
);
414 f
= (FULL
) *a
- (FULL
) u
;
417 while (len
> 0 && *--a
== 0)
426 zmod6(ZVALUE z1
, ZVALUE
*res
)
428 LEN len
, modlen
, len0
;
432 if (ziszero(z1
) || zisone(*lastmod
)) {
439 modlen
= lastmod
->len
;
441 while (zrel(ztmp
, *lastmod
) >= 0) {
445 if (len
> 2 * modlen
) {
446 zp0
.len
= 2 * modlen
;
447 len0
= len
- 2 * modlen
;
449 zp0
.v
= ztmp
.v
+ len
- zp0
.len
;
451 len
= len0
+ zp0
.len
;
452 while (len
> 0 && ztmp
.v
[len
- 1] == 0)
462 zsub(*lastmod
, ztmp
, res
);
471 * Compute the result of raising one number to a power modulo another number.
472 * That is, this computes: a^b (modulo c).
473 * This calculates the result by examining the power POWBITS bits at a time,
474 * using a small table of POWNUMS low powers to calculate powers for those bits,
475 * and repeated squaring and multiplying by the partial powers to generate
476 * the complete power. If the power being raised to is high enough, then
477 * this uses the REDC algorithm to avoid doing many divisions. When using
478 * REDC, multiple calls to this routine using the same modulus will be
482 zpowermod(ZVALUE z1
, ZVALUE z2
, ZVALUE z3
, ZVALUE
*res
)
484 HALF
*hp
; /* pointer to current word of the power */
485 REDC
*rp
; /* REDC information to be used */
486 ZVALUE
*pp
; /* pointer to low power table */
487 ZVALUE ans
, temp
; /* calculation values */
488 ZVALUE modpow
; /* current small power */
489 ZVALUE lowpowers
[POWNUMS
]; /* low powers */
491 int curshift
; /* shift value for word of power */
492 HALF curhalf
; /* current word of power */
493 unsigned int curpow
; /* current low power */
494 unsigned int curbit
; /* current bit of low power */
497 if (zisneg(z3
) || ziszero(z3
)) {
498 math_error("Non-positive modulus in zpowermod");
502 math_error("Negative power in zpowermod");
508 * Check easy cases first.
510 if ((ziszero(z1
) && !ziszero(z2
)) || zisunit(z3
)) {
511 /* 0^(non_zero) or x^y mod 1 always produces zero */
515 if (ziszero(z2
)) { /* x^0 == 1 */
519 if (zistwo(z3
)) { /* mod 2 */
526 if (zisunit(z1
) && (!z1
.sign
|| ziseven(z2
))) {
527 /* 1^x or (-1)^(2x) */
533 * Normalize the number being raised to be non-negative and to lie
534 * within the modulo range. Then check for zero or one specially.
537 if (zisneg(z1
) || zrel(z1
, z3
) >= 0) {
538 zmod(z1
, z3
, &ztmp
, 0);
555 * If modulus is large enough use zmod5
557 if (z3
.len
>= conf
->pow2
) {
558 if (havelastmod
&& zcmp(z3
, *lastmod
)) {
565 zbitvalue(2 * z3
.len
* BASEB
, &temp
);
566 zquo(temp
, z3
, lastmodinv
, 0);
572 for (pp
= &lowpowers
[2]; pp
<= &lowpowers
[POWNUMS
-1]; pp
++) {
576 lowpowers
[0] = _one_
;
580 hp
= &z2
.v
[z2
.len
- 1];
582 curshift
= BASEB
- POWBITS
;
583 while (curshift
&& ((curhalf
>> curshift
) == 0))
587 * Calculate the result by examining the power POWBITS bits at
588 * a time, and use the table of low powers at each iteration.
591 curpow
= (curhalf
>> curshift
) & (POWNUMS
- 1);
592 pp
= &lowpowers
[curpow
];
595 * If the small power is not yet saved in the table,
596 * then calculate it and remember it in the table for
608 pp
= &lowpowers
[curbit
];
610 zsquare(lowpowers
[curbit
/2],
616 if (curbit
& curpow
) {
617 zmul(*pp
, modpow
, &temp
);
620 zcopy(temp
, &modpow
);
624 pp
= &lowpowers
[curpow
];
632 * If the power is nonzero, then accumulate the small
633 * power into the result.
636 zmul(ans
, *pp
, &temp
);
644 * Select the next POWBITS bits of the power, if
645 * there is any more to generate.
652 curshift
= BASEB
- POWBITS
;
656 * Square the result POWBITS times to make room for
657 * the next chunk of bits.
659 for (i
= 0; i
< POWBITS
; i
++) {
668 for (pp
= &lowpowers
[2]; pp
<= &lowpowers
[POWNUMS
-1]; pp
++) {
679 * If the modulus is odd and small enough then use
680 * the REDC algorithm. The size where this is done is configurable.
682 if (z3
.len
< conf
->redc2
&& zisodd(z3
)) {
683 if (powermodredc
&& zcmp(powermodredc
->mod
, z3
)) {
684 zredcfree(powermodredc
);
687 if (powermodredc
== NULL
)
688 powermodredc
= zredcalloc(z3
);
690 zredcencode(rp
, z1
, &temp
);
691 zredcpower(rp
, temp
, z2
, &z1
);
693 zredcdecode(rp
, z1
, res
);
699 * Modulus or power is small enough to perform the power raising
700 * directly. Initialize the table of powers.
702 for (pp
= &lowpowers
[2]; pp
<= &lowpowers
[POWNUMS
-1]; pp
++) {
706 lowpowers
[0] = _one_
;
710 hp
= &z2
.v
[z2
.len
- 1];
712 curshift
= BASEB
- POWBITS
;
713 while (curshift
&& ((curhalf
>> curshift
) == 0))
717 * Calculate the result by examining the power POWBITS bits at a time,
718 * and use the table of low powers at each iteration.
721 curpow
= (curhalf
>> curshift
) & (POWNUMS
- 1);
722 pp
= &lowpowers
[curpow
];
725 * If the small power is not yet saved in the table, then
726 * calculate it and remember it in the table for future use.
734 for (curbit
= 0x2; curbit
<= curpow
; curbit
*= 2) {
735 pp
= &lowpowers
[curbit
];
737 zsquare(lowpowers
[curbit
/2], &temp
);
738 zmod(temp
, z3
, pp
, 0);
741 if (curbit
& curpow
) {
742 zmul(*pp
, modpow
, &temp
);
744 zmod(temp
, z3
, &modpow
, 0);
748 pp
= &lowpowers
[curpow
];
756 * If the power is nonzero, then accumulate the small power
760 zmul(ans
, *pp
, &temp
);
762 zmod(temp
, z3
, &ans
, 0);
767 * Select the next POWBITS bits of the power, if there is
768 * any more to generate.
775 curshift
= BASEB
- POWBITS
;
779 * Square the result POWBITS times to make room for the next
782 for (i
= 0; i
< POWBITS
; i
++) {
785 zmod(temp
, z3
, &ans
, 0);
790 for (pp
= &lowpowers
[2]; pp
<= &lowpowers
[POWNUMS
-1]; pp
++) {
800 * Given a positive odd N-word integer z, evaluate minv(-z, BASEB^N)
803 zredcmodinv(ZVALUE z
, ZVALUE
*res
)
835 f
= (FULL
) v
* (FULL
) *b
++ + (FULL
) *a
++;
838 f
= (FULL
) v
* (FULL
) *b
++ + (FULL
) *a
+ (f
>> BASEB
);
841 while (j
> 0 && *++a0
== 0)
855 * Initialize the REDC algorithm for a particular modulus,
856 * returning a pointer to a structure that is used for other
857 * REDC calls. An error is generated if the structure cannot
858 * be allocated. The modulus must be odd and positive.
861 * z1 modulus to initialize for
864 zredcalloc(ZVALUE z1
)
866 REDC
*rp
; /* REDC information */
870 if (ziseven(z1
) || zisneg(z1
)) {
871 math_error("REDC requires positive odd modulus");
875 rp
= (REDC
*) malloc(sizeof(REDC
));
877 math_error("Cannot allocate REDC structure");
882 * Round up the binary modulus to the next power of two
883 * which is at a word boundary. Then the shift and modulo
884 * operations mod the binary modulus can be done very cheaply.
885 * Calculate the REDC format for the number 1 for future use.
888 zredcmodinv(z1
, &rp
->inv
);
889 bit
= zhighbit(z1
) + 1;
891 bit
+= (BASEB
- (bit
% BASEB
));
892 zbitvalue(bit
, &tmp
);
893 zmod(tmp
, rp
->mod
, &rp
->one
, 0);
895 rp
->len
= (LEN
)(bit
/ BASEB
);
901 * Free any numbers associated with the specified REDC structure,
902 * and then the REDC structure itself.
905 * rp REDC information to be cleared
918 * Convert a normal number into the specified REDC format.
919 * The number to be converted can be negative or out of modulo range.
920 * The resulting number can be used for multiplying, adding, subtracting,
921 * or comparing with any other such converted numbers, as if the numbers
922 * were being calculated modulo the number which initialized the REDC
923 * information. When the final value is unconverted, the result is the
924 * same as if the usual operations were done with the original numbers.
927 * rp REDC information
928 * z1 number to be converted
929 * res returned converted number
932 zredcencode(REDC
*rp
, ZVALUE z1
, ZVALUE
*res
)
937 * Confirm or initialize lastmod information when modulus is a
941 if (rp
->len
>= conf
->pow2
) {
942 if (havelastmod
&& zcmp(rp
->mod
, *lastmod
)) {
948 zcopy(rp
->mod
, lastmod
);
949 zbitvalue(2 * rp
->len
* BASEB
, &tmp1
);
950 zquo(tmp1
, rp
->mod
, lastmodinv
, 0);
956 * Handle the cases 0, 1, -1, and 2 specially since these are
957 * easy to calculate. Zero transforms to zero, and the others
958 * can be obtained from the precomputed REDC format for 1 since
959 * addition and subtraction act normally for REDC format numbers.
970 zsub(rp
->mod
, rp
->one
, res
);
974 zadd(rp
->one
, rp
->one
, &tmp1
);
975 if (zrel(tmp1
, rp
->mod
) < 0) {
979 zsub(tmp1
, rp
->mod
, res
);
985 * Not a trivial number to convert, so do the full transformation.
987 zshift(z1
, rp
->len
* BASEB
, &tmp1
);
988 if (rp
->len
< conf
->pow2
)
989 zmod(tmp1
, rp
->mod
, res
, 0);
997 * The REDC algorithm used to convert numbers out of REDC format and also
998 * used after multiplication of two REDC numbers. Using this routine
999 * avoids any divides, replacing the divide by two multiplications.
1000 * If the numbers are very large, then these two multiplies will be
1001 * quicker than the divide, since dividing is harder than multiplying.
1004 * rp REDC information
1005 * z1 number to be transformed
1006 * res returned transformed number
1009 zredcdecode(REDC
*rp
, ZVALUE z1
, ZVALUE
*res
)
1027 * Check first for the special values for 0 and 1 that are easy.
1033 if ((z1
.len
== rp
->one
.len
) && (z1
.v
[0] == rp
->one
.v
[0]) &&
1034 (zcmp(z1
, rp
->one
) == 0)) {
1043 if (z1
.len
> modlen
) {
1044 ztop
.v
= z1
.v
+ modlen
;
1045 ztop
.len
= z1
.len
- modlen
;
1047 if (zrel(ztop
, rp
->mod
) >= 0) {
1048 zmod(ztop
, rp
->mod
, &ztmp
, 0);
1053 while (len
> 0 && *--h1
== 0)
1065 if (rp
->mod
.len
< conf
->pow2
) {
1066 Ninv
= rp
->inv
.v
[0];
1069 res
->v
= alloc(modlen
);
1072 for (i
= 0; i
< modlen
; i
++) {
1078 muln
= (HALF
) ((f
& BASE1
) * Ninv
);
1079 f
= ((muln
* (FULL
) *h3
++) + f
) >> BASEB
;
1082 f
+= (muln
* (FULL
) *h3
++) + (FULL
) *hd
;
1090 while (*--hd
== 0 && len
> 1)
1096 /* Here 0 < z1 < 2^bitnum */
1099 * First calculate the following:
1100 * tmp2 = ((z1 * inv) % 2^bitnum.
1101 * The mod operations can be done with no work since the bit
1102 * number was selected as a multiple of the word size. Just
1103 * reduce the sizes of the numbers as required.
1105 zmul(z1
, rp
->inv
, &tmp2
);
1106 if (tmp2
.len
> modlen
) {
1107 h1
= tmp2
.v
+ modlen
;
1109 while (len
> 0 && *--h1
== 0)
1115 * Next calculate the following:
1116 * res = (z1 + tmp2 * modulus) / 2^bitnum
1117 * Since 0 < z1 < 2^bitnum and the division is always exact,
1118 * the quotient can be evaluated by rounding up
1119 * (tmp2 * modulus)/2^bitnum. This can be achieved by defining
1120 * zp1 by an appropriate shift and then adding one.
1122 zmul(tmp2
, rp
->mod
, &tmp1
);
1124 if (tmp1
.len
> modlen
) {
1125 zp1
.v
= tmp1
.v
+ modlen
;
1126 zp1
.len
= tmp1
.len
- modlen
;
1128 zadd(zp1
, _one_
, res
);
1135 zadd(*res
, ztop
, &tmp1
);
1143 * Finally do a final modulo by a simple subtraction if necessary.
1144 * This is all that is needed because the previous calculation is
1145 * guaranteed to always be less than twice the modulus.
1148 if (zrel(*res
, rp
->mod
) >= 0) {
1149 zsub(*res
, rp
->mod
, &tmp1
);
1153 if (sign
&& !ziszero(*res
)) {
1154 zsub(rp
->mod
, *res
, &tmp1
);
1163 * Multiply two numbers in REDC format together producing a result also
1164 * in REDC format. If the result is converted back to a normal number,
1165 * then the result is the same as the modulo'd multiplication of the
1166 * original numbers before they were converted to REDC format. This
1167 * calculation is done in one of two ways, depending on the size of the
1168 * modulus. For large numbers, the REDC definition is used directly
1169 * which involves three multiplies overall. For small numbers, a
1170 * complicated routine is used which does the indicated multiplication
1171 * and the REDC algorithm at the same time to produce the result.
1174 * rp REDC information
1175 * z1 first REDC number to be multiplied
1176 * z2 second REDC number to be multiplied
1177 * res resulting REDC number
1180 zredcmul(REDC
*rp
, ZVALUE z1
, ZVALUE z2
, ZVALUE
*res
)
1197 ZVALUE z1tmp
, z2tmp
;
1200 sign
= z1
.sign
^ z2
.sign
;
1204 if (zrel(z1
, rp
->mod
) >= 0) {
1205 zmod(z1
, rp
->mod
, &z1tmp
, 0);
1209 if (zrel(z2
, rp
->mod
) >= 0) {
1210 zmod(z2
, rp
->mod
, &z2tmp
, 0);
1216 * Check for special values which we easily know the answer.
1218 if (ziszero(z1
) || ziszero(z2
)) {
1227 if ((z1
.len
== rp
->one
.len
) && (z1
.v
[0] == rp
->one
.v
[0]) &&
1228 (zcmp(z1
, rp
->one
) == 0)) {
1230 zsub(rp
->mod
, z2
, res
);
1240 if ((z2
.len
== rp
->one
.len
) && (z2
.v
[0] == rp
->one
.v
[0]) &&
1241 (zcmp(z2
, rp
->one
) == 0)) {
1243 zsub(rp
->mod
, z1
, res
);
1254 * If the size of the modulus is large, then just do the multiply,
1255 * followed by the two multiplies contained in the REDC routine.
1256 * This will be quicker than directly doing the REDC calculation
1257 * because of the O(N^1.585) speed of the multiplies. The size
1258 * of the number which this is done is configurable.
1260 if (rp
->mod
.len
>= conf
->redc2
) {
1262 zredcdecode(rp
, tmp
, res
);
1264 if (sign
&& !ziszero(*res
)) {
1265 zsub(rp
->mod
, *res
, &tmp
);
1277 * The number is small enough to calculate by doing the O(N^2) REDC
1278 * algorithm directly. This algorithm performs the multiplication and
1279 * the reduction at the same time. Notice the obscure facts that
1280 * only the lowest word of the inverse value is used, and that
1281 * there is no shifting of the partial products as there is in a
1284 modlen
= rp
->mod
.len
;
1285 Ninv
= rp
->inv
.v
[0];
1288 * Allocate the result and clear it.
1289 * The size of the result will be equal to or smaller than
1294 res
->v
= alloc(modlen
);
1301 * Do this outermost loop over all the digits of z1.
1307 * Start off with the next digit of z1, the first
1308 * digit of z2, and the first digit of the modulus.
1310 mulb
= (FULL
) *h1
++;
1314 sival1
.ivalue
= mulb
* ((FULL
) *h2
++) + ((FULL
) *hd
++);
1315 muln
= ((HALF
) (sival1
.silow
* Ninv
));
1316 sival2
.ivalue
= muln
* ((FULL
) *h3
++) + ((FULL
) sival1
.silow
);
1317 carry
.ivalue
= ((FULL
) sival1
.sihigh
) + ((FULL
) sival2
.sihigh
);
1320 * Do this innermost loop for each digit of z2, except
1321 * for the first digit which was just done above.
1324 while (--len2
> 0) {
1325 sival1
.ivalue
= mulb
* ((FULL
) *h2
++)
1326 + ((FULL
) *hd
) + ((FULL
) carry
.silow
);
1327 sival2
.ivalue
= muln
* ((FULL
) *h3
++)
1328 + ((FULL
) sival1
.silow
);
1329 carry
.ivalue
= ((FULL
) sival1
.sihigh
)
1330 + ((FULL
) sival2
.sihigh
)
1331 + ((FULL
) carry
.sihigh
);
1333 hd
[-1] = sival2
.silow
;
1338 * Now continue the loop as necessary so the total number
1339 * of iterations is equal to the size of the modulus.
1340 * This acts as if the innermost loop was repeated for
1341 * high digits of z2 that are zero.
1343 len2
= modlen
- z2
.len
;
1345 sival2
.ivalue
= muln
* ((FULL
) *h3
++)
1347 + ((FULL
) carry
.silow
);
1348 carry
.ivalue
= ((FULL
) sival2
.sihigh
)
1349 + ((FULL
) carry
.sihigh
);
1351 hd
[-1] = sival2
.silow
;
1355 carry
.ivalue
+= topdigit
;
1356 hd
[-1] = carry
.silow
;
1357 topdigit
= carry
.sihigh
;
1361 * Now continue the loop as necessary so the total number
1362 * of iterations is equal to the size of the modulus.
1363 * This acts as if the outermost loop was repeated for high
1364 * digits of z1 that are zero.
1366 len
= modlen
- z1
.len
;
1369 * Start off with the first digit of the modulus.
1373 muln
= ((HALF
) (*hd
* Ninv
));
1374 sival2
.ivalue
= muln
* ((FULL
) *h3
++) + (FULL
) *hd
++;
1375 carry
.ivalue
= ((FULL
) sival2
.sihigh
);
1378 * Do this innermost loop for each digit of the modulus,
1379 * except for the first digit which was just done above.
1382 while (--len2
> 0) {
1383 sival2
.ivalue
= muln
* ((FULL
) *h3
++)
1384 + ((FULL
) *hd
) + ((FULL
) carry
.silow
);
1385 carry
.ivalue
= ((FULL
) sival2
.sihigh
)
1386 + ((FULL
) carry
.sihigh
);
1388 hd
[-1] = sival2
.silow
;
1391 carry
.ivalue
+= topdigit
;
1392 hd
[-1] = carry
.silow
;
1393 topdigit
= carry
.sihigh
;
1397 * Determine the true size of the result, taking the top digit of
1398 * the current result into account. The top digit is not stored in
1399 * the number because it is temporary and would become zero anyway
1400 * after the final subtraction is done.
1402 if (topdigit
== 0) {
1404 while (*--hd
== 0 && len
> 1) {
1410 * Compare the result with the modulus.
1411 * If it is less than the modulus, then the calculation is complete.
1414 if (zrel(*res
, rp
->mod
) < 0) {
1419 if (sign
&& !ziszero(*res
)) {
1420 zsub(rp
->mod
, *res
, &tmp
);
1429 * Do a subtraction to reduce the result to a value less than
1430 * the modulus. The REDC algorithm guarantees that a single subtract
1431 * is all that is needed. Ignore any borrowing from the possible
1432 * highest word of the current result because that would affect
1433 * only the top digit value that was not stored and would become
1441 carry
.ivalue
= BASE1
- ((FULL
) *hd
) + ((FULL
) *h1
++)
1442 + ((FULL
) carry
.silow
);
1443 *hd
++ = (HALF
)(BASE1
- carry
.silow
);
1444 carry
.silow
= carry
.sihigh
;
1448 * Now finally recompute the size of the result.
1451 hd
= &res
->v
[len
- 1];
1452 while ((*hd
== 0) && (len
> 1)) {
1461 if (sign
&& !ziszero(*res
)) {
1462 zsub(rp
->mod
, *res
, &tmp
);
1470 * Square a number in REDC format producing a result also in REDC format.
1473 * rp REDC information
1474 * z1 REDC number to be squared
1475 * res resulting REDC number
1478 zredcsquare(REDC
*rp
, ZVALUE z1
, ZVALUE
*res
)
1500 if (zrel(z1
, rp
->mod
) >= 0) {
1501 zmod(z1
, rp
->mod
, &ztmp
, 0);
1510 if ((z1
.len
== rp
->one
.len
) && (z1
.v
[0] == rp
->one
.v
[0]) &&
1511 (zcmp(z1
, rp
->one
) == 0)) {
1520 * If the modulus is small enough, then call the multiply
1521 * routine to produce the result. Otherwise call the O(N^1.585)
1522 * routines to get the answer.
1524 if (rp
->mod
.len
>= conf
->redc2
1525 || 3 * z1
.len
< 2 * rp
->mod
.len
) {
1527 zredcdecode(rp
, tmp
, res
);
1533 modlen
= rp
->mod
.len
;
1534 Ninv
= rp
->inv
.v
[0];
1538 res
->v
= alloc(modlen
);
1544 for (i
= 0; i
< z1
.len
; i
++) {
1545 mulb
= (FULL
) *h1
++;
1550 sival1
.ivalue
= mulb
* mulb
;
1551 muln
= (HALF
) (sival1
.silow
* Ninv
);
1552 sival2
.ivalue
= muln
* ((FULL
) *h3
++)
1553 + (FULL
) sival1
.silow
;
1554 carry
.ivalue
= (FULL
) sival1
.sihigh
1555 + (FULL
) sival2
.sihigh
;
1558 muln
= (HALF
) (*hd
* Ninv
);
1559 f
= (muln
* ((FULL
) *h3
++) + (FULL
) *hd
++) >> BASEB
;
1562 f
+= muln
* ((FULL
) *h3
++) + *hd
;
1568 sival1
.ivalue
= mulb
* mulb
+ (FULL
) carry
.silow
;
1569 sival2
.ivalue
= muln
* ((FULL
) *h3
++)
1571 + (FULL
) sival1
.silow
;
1572 carry
.ivalue
= (FULL
) sival1
.sihigh
1573 + (FULL
) sival2
.sihigh
1574 + (FULL
) carry
.sihigh
;
1575 hd
[-1] = sival2
.silow
;
1580 sival1
.ivalue
= mulb
* ((FULL
) *h2
++);
1581 sival2
.ivalue
= ((FULL
) sival1
.silow
<< 1)
1582 + muln
* ((FULL
) *h3
++);
1583 sival3
.ivalue
= (FULL
) sival2
.silow
1585 + (FULL
) carry
.silow
;
1586 carry
.ivalue
= ((FULL
) sival1
.sihigh
<< 1)
1587 + (FULL
) sival2
.sihigh
1588 + (FULL
) sival3
.sihigh
1589 + (FULL
) carry
.sihigh
;
1590 hd
[-1] = sival3
.silow
;
1593 j
= modlen
- z1
.len
;
1595 sival1
.ivalue
= muln
* ((FULL
) *h3
++)
1597 + (FULL
) carry
.silow
;
1598 carry
.ivalue
= (FULL
) sival1
.sihigh
1599 + (FULL
) carry
.sihigh
;
1600 hd
[-1] = sival1
.silow
;
1603 carry
.ivalue
+= (FULL
) topdigit
;
1604 hd
[-1] = carry
.silow
;
1605 topdigit
= carry
.sihigh
;
1607 i
= modlen
- z1
.len
;
1611 muln
= (HALF
) (*hd
* Ninv
);
1612 sival1
.ivalue
= muln
* ((FULL
) *h3
++) + (FULL
) *hd
++;
1613 carry
.ivalue
= (FULL
) sival1
.sihigh
;
1616 sival1
.ivalue
= muln
* ((FULL
) *h3
++)
1618 + (FULL
) carry
.silow
;
1619 carry
.ivalue
= (FULL
) sival1
.sihigh
1620 + (FULL
) carry
.sihigh
;
1621 hd
[-1] = sival1
.silow
;
1624 carry
.ivalue
+= (FULL
) topdigit
;
1625 hd
[-1] = carry
.silow
;
1626 topdigit
= carry
.sihigh
;
1628 if (topdigit
== 0) {
1630 while (*--hd
== 0 && len
> 1) {
1634 if (zrel(*res
, rp
->mod
) < 0) {
1646 carry
.ivalue
= BASE1
- ((FULL
) *hd
) + ((FULL
) *h1
++)
1647 + ((FULL
) carry
.silow
);
1648 *hd
++ = (HALF
)(BASE1
- carry
.silow
);
1649 carry
.silow
= carry
.sihigh
;
1653 hd
= &res
->v
[len
- 1];
1654 while ((*hd
== 0) && (len
> 1)) {
1665 * Compute the result of raising a REDC format number to a power.
1666 * The result is within the range 0 to the modulus - 1.
1667 * This calculates the result by examining the power POWBITS bits at a time,
1668 * using a small table of POWNUMS low powers to calculate powers for those bits,
1669 * and repeated squaring and multiplying by the partial powers to generate
1670 * the complete power.
1673 * rp REDC information
1674 * z1 REDC number to be raised
1675 * z2 normal number to raise number to
1679 zredcpower(REDC
*rp
, ZVALUE z1
, ZVALUE z2
, ZVALUE
*res
)
1681 HALF
*hp
; /* pointer to current word of the power */
1682 ZVALUE
*pp
; /* pointer to low power table */
1683 ZVALUE ans
, temp
; /* calculation values */
1685 ZVALUE modpow
; /* current small power */
1686 ZVALUE lowpowers
[POWNUMS
]; /* low powers */
1687 int curshift
; /* shift value for word of power */
1688 HALF curhalf
; /* current word of power */
1689 unsigned int curpow
; /* current low power */
1690 unsigned int curbit
; /* current bit of low power */
1695 math_error("Negative power in zredcpower");
1699 if (zisunit(rp
->mod
)) {
1704 sign
= zisodd(z2
) ? z1
.sign
: 0;
1707 if (zrel(z1
, rp
->mod
) >= 0) {
1708 zmod(z1
, rp
->mod
, &ztmp
, 0);
1712 * Check for zero or the REDC format for one.
1723 if (zcmp(z1
, rp
->one
) == 0) {
1725 zsub(rp
->mod
, rp
->one
, res
);
1727 zcopy(rp
->one
, res
);
1734 * See if the number being raised is the REDC format for -1.
1735 * If so, then the answer is the REDC format for one or minus one.
1736 * To do this check, calculate the REDC format for -1.
1738 if (((HALF
)(z1
.v
[0] + rp
->one
.v
[0])) == rp
->mod
.v
[0]) {
1739 zsub(rp
->mod
, rp
->one
, &temp
);
1740 if (zcmp(z1
, temp
) == 0) {
1741 if (zisodd(z2
) ^ sign
) {
1748 zcopy(rp
->one
, res
);
1756 for (pp
= &lowpowers
[2]; pp
< &lowpowers
[POWNUMS
]; pp
++)
1758 zcopy(rp
->one
, &lowpowers
[0]);
1759 zcopy(z1
, &lowpowers
[1]);
1760 zcopy(rp
->one
, &ans
);
1762 hp
= &z2
.v
[z2
.len
- 1];
1764 curshift
= BASEB
- POWBITS
;
1765 while (curshift
&& ((curhalf
>> curshift
) == 0))
1766 curshift
-= POWBITS
;
1769 * Calculate the result by examining the power POWBITS bits at a time,
1770 * and use the table of low powers at each iteration.
1773 curpow
= (curhalf
>> curshift
) & (POWNUMS
- 1);
1774 pp
= &lowpowers
[curpow
];
1777 * If the small power is not yet saved in the table, then
1778 * calculate it and remember it in the table for future use.
1784 zcopy(rp
->one
, &modpow
);
1786 for (curbit
= 0x2; curbit
<= curpow
; curbit
*= 2) {
1787 pp
= &lowpowers
[curbit
];
1789 zredcsquare(rp
, lowpowers
[curbit
/2],
1791 if (curbit
& curpow
) {
1792 zredcmul(rp
, *pp
, modpow
, &temp
);
1797 pp
= &lowpowers
[curpow
];
1805 * If the power is nonzero, then accumulate the small power
1809 zredcmul(rp
, ans
, *pp
, &temp
);
1815 * Select the next POWBITS bits of the power, if there is
1816 * any more to generate.
1818 curshift
-= POWBITS
;
1823 curshift
= BASEB
- POWBITS
;
1827 * Square the result POWBITS times to make room for the next
1830 for (i
= 0; i
< POWBITS
; i
++) {
1831 zredcsquare(rp
, ans
, &temp
);
1837 for (pp
= lowpowers
; pp
< &lowpowers
[POWNUMS
]; pp
++) {
1841 if (sign
&& !ziszero(ans
)) {
1842 zsub(rp
->mod
, ans
, res
);
1853 * zhnrmod - compute z mod h*2^n+r
1855 * We compute v mod h*2^n+r, where h>0, n>0, abs(r) <= 1, as follows:
1857 * Let v = b*2^n + a, where 0 <= a < 2^n
1859 * Now v mod h*2^n+r == b*2^n + a mod h*2^n+r,
1860 * and thus v mod h*2^n+r == b*2^n mod h*2^n+r + a mod h*2^n+r.
1862 * Because 0 <= a < 2^n < h*2^n+r, a mod h*2^n+r == a.
1863 * Thus v mod h*2^n+r == b*2^n mod h*2^n+r + a.
1865 * It can be shown that b*2^n mod h*2^n == 2^n * (b mod h).
1867 * Thus for r == 0, v mod h*2^n+r == (2^n)*(b mod h) + a.
1869 * It can be shown that v mod 2^n-1 == a+b mod 2^n-1.
1871 * Thus for r == -1, v mod h*2^n+r == (2^n)*(b mod h) + a + int(b/h).
1873 * It can be shown that v mod 2^n+1 == a-b mod 2^n+1.
1875 * Thus for r == +1, v mod h*2^n+r == (2^n)*(b mod h) + a - int(b/h).
1877 * Therefore, v mod h*2^n+r == (2^n)*(b mod h) + a - r*int(b/h).
1879 * The above proof leads to the following calc resource file which computes
1880 * the value z mod h*2^n+r:
1882 * define hnrmod(v,h,n,r)
1884 * local a,b,modulus,tquo,tmod,lbit,ret;
1886 * if (!isint(h) || h < 1) {
1887 * quit "h must be an integer be > 0";
1889 * if (!isint(n) || n < 1) {
1890 * quit "n must be an integer be > 0";
1892 * if (r != 1 && r != 0 && r != -1) {
1893 * quit "r must be -1, 0 or 1";
1903 * if (modulus <= 2^31-1) {
1904 * return v % modulus;
1909 * if (highbit(ret) < n) {
1920 * quomod(b, h, tquo, tmod);
1921 * ret = tmod<<n + a + tquo;
1928 * ret = (b%h)<<n + a;
1933 * ret = ((a > b) ? a-b : modulus+a-b);
1935 * quomod(b, h, tquo, tmod);
1936 * tmod = tmod<<n + a;
1937 * ret = ((tmod >= tquo) ? tmod-tquo : modulus+tmod-tquo);
1941 * } while (ret > modulus);
1942 * ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret));
1947 * This function implements the above calc resource file.
1950 * v take mod of this value, v >= 0
1951 * zh h from modulus h*2^n+r, h > 0
1952 * zn n from modulus h*2^n+r, n > 0
1953 * zr r from modulus h*2^n+r, abs(r) <= 1
1957 zhnrmod(ZVALUE v
, ZVALUE zh
, ZVALUE zn
, ZVALUE zr
, ZVALUE
*res
)
1959 ZVALUE a
; /* lower n bits of v */
1960 ZVALUE b
; /* bits above the lower n bits of v */
1961 ZVALUE h
; /* working zh value */
1962 ZVALUE modulus
; /* h^2^n + r */
1963 ZVALUE tquo
; /* b // h */
1964 ZVALUE tmod
; /* b % h or (b%h)<<n + a */
1965 ZVALUE t
; /* temp ZVALUE */
1966 ZVALUE t2
; /* temp ZVALUE */
1967 ZVALUE ret
; /* return value, what *res is set to */
1968 long n
; /* integer value of zn */
1969 long r
; /* integer value of zr */
1970 long hbit
; /* highbit(res) */
1971 long lbit
; /* lowbit(h) */
1972 int zrelval
; /* return value of zrel() */
1973 int hisone
; /* 1 => h == 1, 0 => h != 1 */
1978 if (zisneg(zh
) || ziszero(zh
)) {
1979 math_error("h must be > 0");
1982 if (zisneg(zn
) || ziszero(zn
)) {
1983 math_error("n must be > 0");
1987 math_error("n must be < 2^31");
1990 if (!zisabsleone(zr
)) {
1991 math_error("r must be -1, 0 or 1");
2004 /* lbit = lowbit(h); */
2006 /* if (lbit > 0) { n += lbit; h >>= lbit; } */
2009 zshift(zh
, -lbit
, &h
);
2013 /* modulus = h<<n+r; */
2017 zadd(t
, _one_
, &modulus
);
2024 zsub(t
, _one_
, &modulus
);
2028 /* if (modulus <= MAXLONG) { return v % modulus; } */
2029 if (!zgtmaxlong(modulus
)) {
2030 itoz(zmodi(v
, ztolong(modulus
)), res
);
2041 * shift-add modulus loop
2047 * split ret into to chunks, the lower n bits
2048 * and everything above the lower n bits
2050 /* if (highbit(ret) < n) { break; } */
2051 hbit
= (long)zhighbit(ret
);
2053 zrelval
= (zcmp(ret
, modulus
) ? -1 : 0);
2057 zshift(ret
, -n
, &b
);
2059 /* a = ret - (b<<n); */
2061 a
.len
= (n
+BASEB
-1)/BASEB
;
2063 memcpy(a
.v
, ret
.v
, a
.len
*sizeof(HALF
));
2065 a
.v
[a
.len
- 1] &= lowhalf
[n
% BASEB
];
2070 * switch depending on r == -1, 0 or 1
2073 case -1: /* v mod h*2^h-1 */
2074 /* if (h == 1) ... */
2082 /* quomod(b, h, tquo, tmod); */
2083 (void) zdiv(b
, h
, &tquo
, &tmod
, 0);
2084 /* ret = tmod<<n + a + tquo; */
2085 zshift(tmod
, n
, &t
);
2096 case 0: /* v mod h*2^h-1 */
2097 /* if (h == 1) ... */
2105 /* ret = (b%h)<<n + a; */
2106 (void) zmod(b
, h
, &tmod
, 0);
2107 zshift(tmod
, n
, &t
);
2115 case 1: /* v mod h*2^h-1 */
2116 /* if (h == 1) ... */
2124 /* quomod(b, h, tquo, tmod); */
2125 (void) zdiv(b
, h
, &tquo
, &tmod
, 0);
2126 /* tmod = tmod<<n + a; */
2127 zshift(tmod
, n
, &t
);
2131 /* ret = tmod-tquo; */
2133 zsub(tmod
, tquo
, &ret
);
2142 /* ... while (abs(ret) > modulus); */
2143 } while ((zrelval
= zabsrel(ret
, modulus
)) > 0);
2144 /* ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret)); */
2146 zadd(ret
, modulus
, &t
);
2149 } else if (zrelval
== 0) {