modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / etc / calc / zmod.c
blob2268e8ee0ac3b3a5127edb474dd11fab6a4522d3
1 /*
2 * zmod - modulo arithmetic routines
4 * Copyright (C) 1999-2007 David I. Bell, Landon Curt Noll and Ernest Bowen
6 * Primary author: David I. Bell
8 * Calc is open software; you can redistribute it and/or modify it under
9 * the terms of the version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
12 * Calc is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
15 * Public License for more details.
17 * A copy of version 2.1 of the GNU Lesser General Public License is
18 * distributed with calc under the filename COPYING-LGPL. You should have
19 * received a copy with calc; if not, write to Free Software Foundation, Inc.
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * @(#) $Revision: 30.1 $
23 * @(#) $Id: zmod.c,v 30.1 2007/03/16 11:09:46 chongo Exp $
24 * @(#) $Source: /usr/local/src/bin/calc/RCS/zmod.c,v $
26 * Under source code control: 1991/05/22 23:03:55
27 * File existed as early as: 1991
29 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
33 * Routines to do modulo arithmetic both normally and also using the REDC
34 * algorithm given by Peter L. Montgomery in Mathematics of Computation,
35 * volume 44, number 170 (April, 1985). For multiple multiplies using
36 * the same large modulus, the REDC algorithm avoids the usual division
37 * by the modulus, instead replacing it with two multiplies or else a
38 * special algorithm. When these two multiplies or the special algorithm
39 * are faster then the division, then the REDC algorithm is better.
43 #include "config.h"
44 #include "zmath.h"
47 #define POWBITS 4 /* bits for power chunks (must divide BASEB) */
48 #define POWNUMS (1<<POWBITS) /* number of powers needed in table */
50 S_FUNC void zmod5(ZVALUE *zp);
51 S_FUNC void zmod6(ZVALUE z1, ZVALUE *res);
52 S_FUNC void zredcmodinv(ZVALUE z1, ZVALUE *res);
54 STATIC REDC *powermodredc = NULL; /* REDC info for raising to power */
56 BOOL havelastmod = FALSE;
57 STATIC ZVALUE lastmod[1];
58 STATIC ZVALUE lastmodinv[1];
62 * Square a number and then mod the result with a second number.
63 * The number to be squared can be negative or out of modulo range.
64 * The result will be in the range 0 to the modulus - 1.
66 * given:
67 * z1 number to be squared
68 * z2 number to take mod with
69 * res result
71 void
72 zsquaremod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
74 ZVALUE tmp;
75 FULL prod;
76 FULL digit;
78 if (ziszero(z2) || zisneg(z2))
79 math_error("Mod of non-positive integer");
80 /*NOTREACHED*/
81 if (ziszero(z1) || zisunit(z2)) {
82 *res = _zero_;
83 return;
87 * If the modulus is a single digit number, then do the result
88 * cheaply. Check especially for a small power of two.
90 if (zistiny(z2)) {
91 digit = z2.v[0];
92 if ((digit & -digit) == digit) { /* NEEDS 2'S COMP */
93 prod = (FULL) z1.v[0];
94 prod = (prod * prod) & (digit - 1);
95 } else {
96 z1.sign = 0;
97 prod = (FULL) zmodi(z1, (long) digit);
98 prod = (prod * prod) % digit;
100 itoz((long) prod, res);
101 return;
105 * The modulus is more than one digit.
106 * Actually do the square and divide if necessary.
108 zsquare(z1, &tmp);
109 if ((tmp.len < z2.len) ||
110 ((tmp.len == z2.len) && (tmp.v[tmp.len-1] < z2.v[z2.len-1]))) {
111 *res = tmp;
112 return;
114 zmod(tmp, z2, res, 0);
115 zfree(tmp);
120 * Calculate the number congruent to the given number whose absolute
121 * value is minimal. The number to be reduced can be negative or out of
122 * modulo range. The result will be within the range -int((modulus-1)/2)
123 * to int(modulus/2) inclusive. For example, for modulus 7, numbers are
124 * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to
125 * the range [-3, 4].
127 * given:
128 * z1 number to find minimum congruence of
129 * z2 number to take mod with
130 * res result
132 void
133 zminmod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
135 ZVALUE tmp1, tmp2;
136 int sign;
137 int cv;
139 if (ziszero(z2) || zisneg(z2)) {
140 math_error("Mod of non-positive integer");
141 /*NOTREACHED*/
143 if (ziszero(z1) || zisunit(z2)) {
144 *res = _zero_;
145 return;
147 if (zistwo(z2)) {
148 if (zisodd(z1))
149 *res = _one_;
150 else
151 *res = _zero_;
152 return;
156 * Do a quick check to see if the number is very small compared
157 * to the modulus. If so, then the result is obvious.
159 if (z1.len < z2.len - 1) {
160 zcopy(z1, res);
161 return;
165 * Now make sure the input number is within the modulo range.
166 * If not, then reduce it to be within range and make the
167 * quick check again.
169 sign = z1.sign;
170 z1.sign = 0;
171 cv = zrel(z1, z2);
172 if (cv == 0) {
173 *res = _zero_;
174 return;
176 tmp1 = z1;
177 if (cv > 0) {
178 z1.sign = (BOOL)sign;
179 zmod(z1, z2, &tmp1, 0);
180 if (tmp1.len < z2.len - 1) {
181 *res = tmp1;
182 return;
184 sign = 0;
188 * Now calculate the difference of the modulus and the absolute
189 * value of the original number. Compare the original number with
190 * the difference, and return the one with the smallest absolute
191 * value, with the correct sign. If the two values are equal, then
192 * return the positive result.
194 zsub(z2, tmp1, &tmp2);
195 cv = zrel(tmp1, tmp2);
196 if (cv < 0) {
197 zfree(tmp2);
198 tmp1.sign = (BOOL)sign;
199 if (tmp1.v == z1.v)
200 zcopy(tmp1, res);
201 else
202 *res = tmp1;
203 } else {
204 if (cv)
205 tmp2.sign = !sign;
206 if (tmp1.v != z1.v)
207 zfree(tmp1);
208 *res = tmp2;
214 * Compare two numbers for equality modulo a third number.
215 * The two numbers to be compared can be negative or out of modulo range.
216 * Returns TRUE if the numbers are not congruent, and FALSE if they are
217 * congruent.
219 * given:
220 * z1 first number to be compared
221 * z2 second number to be compared
222 * z3 modulus
224 BOOL
225 zcmpmod(ZVALUE z1, ZVALUE z2, ZVALUE z3)
227 ZVALUE tmp1, tmp2, tmp3;
228 FULL digit;
229 LEN len;
230 int cv;
232 if (zisneg(z3) || ziszero(z3)) {
233 math_error("Non-positive modulus in zcmpmod");
234 /*NOTREACHED*/
236 if (zistwo(z3))
237 return (((z1.v[0] + z2.v[0]) & 0x1) != 0);
240 * If the two numbers are equal, then their mods are equal.
242 if ((z1.sign == z2.sign) && (z1.len == z2.len) &&
243 (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0))
244 return FALSE;
247 * If both numbers are negative, then we can make them positive.
249 if (zisneg(z1) && zisneg(z2)) {
250 z1.sign = 0;
251 z2.sign = 0;
255 * For small negative numbers, make them positive before comparing.
256 * In any case, the resulting numbers are in tmp1 and tmp2.
258 tmp1 = z1;
259 tmp2 = z2;
260 len = z3.len;
261 digit = z3.v[len - 1];
263 if (zisneg(z1) && ((z1.len < len) ||
264 ((z1.len == len) && (z1.v[z1.len - 1] < digit))))
265 zadd(z1, z3, &tmp1);
267 if (zisneg(z2) && ((z2.len < len) ||
268 ((z2.len == len) && (z2.v[z2.len - 1] < digit))))
269 zadd(z2, z3, &tmp2);
272 * Now compare the two numbers for equality.
273 * If they are equal we are all done.
275 if (zcmp(tmp1, tmp2) == 0) {
276 if (tmp1.v != z1.v)
277 zfree(tmp1);
278 if (tmp2.v != z2.v)
279 zfree(tmp2);
280 return FALSE;
284 * They are not identical. Now if both numbers are positive
285 * and less than the modulus, then they are definitely not equal.
287 if ((tmp1.sign == tmp2.sign) &&
288 ((tmp1.len < len) || (zrel(tmp1, z3) < 0)) &&
289 ((tmp2.len < len) || (zrel(tmp2, z3) < 0))) {
290 if (tmp1.v != z1.v)
291 zfree(tmp1);
292 if (tmp2.v != z2.v)
293 zfree(tmp2);
294 return TRUE;
298 * Either one of the numbers is negative or is large.
299 * So do the standard thing and subtract the two numbers.
300 * Then they are equal if the result is 0 (mod z3).
302 zsub(tmp1, tmp2, &tmp3);
303 if (tmp1.v != z1.v)
304 zfree(tmp1);
305 if (tmp2.v != z2.v)
306 zfree(tmp2);
309 * Compare the result with the modulus to see if it is equal to
310 * or less than the modulus. If so, we know the mod result.
312 tmp3.sign = 0;
313 cv = zrel(tmp3, z3);
314 if (cv == 0) {
315 zfree(tmp3);
316 return FALSE;
318 if (cv < 0) {
319 zfree(tmp3);
320 return TRUE;
324 * We are forced to actually do the division.
325 * The numbers are congruent if the result is zero.
327 zmod(tmp3, z3, &tmp1, 0);
328 zfree(tmp3);
329 if (ziszero(tmp1)) {
330 zfree(tmp1);
331 return FALSE;
332 } else {
333 zfree(tmp1);
334 return TRUE;
340 * Given the address of a positive integer whose word count does not
341 * exceed twice that of the modulus stored at lastmod, to evaluate and store
342 * at that address the value of the integer modulo the modulus.
344 S_FUNC void
345 zmod5(ZVALUE *zp)
347 LEN len, modlen, j;
348 ZVALUE tmp1, tmp2;
349 ZVALUE z1, z2, z3;
350 HALF *a, *b;
351 FULL f;
352 HALF u;
354 int subcount = 0;
356 if (zrel(*zp, *lastmod) < 0)
357 return;
358 modlen = lastmod->len;
359 len = zp->len;
360 z1.v = zp->v + modlen - 1;
361 z1.len = len - modlen + 1;
362 z1.sign = z2.sign = z3.sign = 0;
363 if (z1.len > modlen + 1) {
364 math_error("Bad call to zmod5!!!");
365 /*NOTREACHED*/
367 z2.v = lastmodinv->v + modlen + 1 - z1.len;
368 z2.len = lastmodinv->len - modlen - 1 + z1.len;
369 zmul(z1, z2, &tmp1);
370 z3.v = tmp1.v + z1.len;
371 z3.len = tmp1.len - z1.len;
372 if (z3.len > 0) {
373 zmul(z3, *lastmod, &tmp2);
374 j = modlen;
375 a = zp->v;
376 b = tmp2.v;
377 u = 0;
378 len = modlen;
379 while (j-- > 0) {
380 f = (FULL) *a - (FULL) *b++ - (FULL) u;
381 *a++ = (HALF) f;
382 u = - (HALF) (f >> BASEB);
384 if (z1.len > 1) {
385 len++;
386 if (tmp2.len > modlen)
387 f = (FULL) *a - (FULL) *b - (FULL) u;
388 else
389 f = (FULL) *a - (FULL) u;
390 *a++ = (HALF) f;
392 while (len > 0 && *--a == 0)
393 len--;
394 zp->len = len;
395 zfree(tmp2);
397 zfree(tmp1);
398 while (len > 0 && zrel(*zp, *lastmod) >= 0) {
399 subcount++;
400 if (subcount > 2) {
401 math_error("Too many subtractions in zmod5");
402 /*NOTREACHED*/
404 j = modlen;
405 a = zp->v;
406 b = lastmod->v;
407 u = 0;
408 while (j-- > 0) {
409 f = (FULL) *a - (FULL) *b++ - (FULL) u;
410 *a++ = (HALF) f;
411 u = - (HALF) (f >> BASEB);
413 if (len > modlen) {
414 f = (FULL) *a - (FULL) u;
415 *a++ = (HALF) f;
417 while (len > 0 && *--a == 0)
418 len--;
419 zp->len = len;
421 if (len == 0)
422 zp->len = 1;
425 S_FUNC void
426 zmod6(ZVALUE z1, ZVALUE *res)
428 LEN len, modlen, len0;
429 int sign;
430 ZVALUE zp0, ztmp;
432 if (ziszero(z1) || zisone(*lastmod)) {
433 *res = _zero_;
434 return;
436 sign = z1.sign;
437 z1.sign = 0;
438 zcopy(z1, &ztmp);
439 modlen = lastmod->len;
440 zp0.sign = 0;
441 while (zrel(ztmp, *lastmod) >= 0) {
442 len = ztmp.len;
443 zp0.len = len;
444 len0 = 0;
445 if (len > 2 * modlen) {
446 zp0.len = 2 * modlen;
447 len0 = len - 2 * modlen;
449 zp0.v = ztmp.v + len - zp0.len;
450 zmod5(&zp0);
451 len = len0 + zp0.len;
452 while (len > 0 && ztmp.v[len - 1] == 0)
453 len--;
454 if (len == 0) {
455 zfree(ztmp);
456 *res = _zero_;
457 return;
459 ztmp.len = len;
461 if (sign)
462 zsub(*lastmod, ztmp, res);
463 else
464 zcopy(ztmp, res);
465 zfree(ztmp);
471 * Compute the result of raising one number to a power modulo another number.
472 * That is, this computes: a^b (modulo c).
473 * This calculates the result by examining the power POWBITS bits at a time,
474 * using a small table of POWNUMS low powers to calculate powers for those bits,
475 * and repeated squaring and multiplying by the partial powers to generate
476 * the complete power. If the power being raised to is high enough, then
477 * this uses the REDC algorithm to avoid doing many divisions. When using
478 * REDC, multiple calls to this routine using the same modulus will be
479 * slightly faster.
481 void
482 zpowermod(ZVALUE z1, ZVALUE z2, ZVALUE z3, ZVALUE *res)
484 HALF *hp; /* pointer to current word of the power */
485 REDC *rp; /* REDC information to be used */
486 ZVALUE *pp; /* pointer to low power table */
487 ZVALUE ans, temp; /* calculation values */
488 ZVALUE modpow; /* current small power */
489 ZVALUE lowpowers[POWNUMS]; /* low powers */
490 ZVALUE ztmp;
491 int curshift; /* shift value for word of power */
492 HALF curhalf; /* current word of power */
493 unsigned int curpow; /* current low power */
494 unsigned int curbit; /* current bit of low power */
495 int i;
497 if (zisneg(z3) || ziszero(z3)) {
498 math_error("Non-positive modulus in zpowermod");
499 /*NOTREACHED*/
501 if (zisneg(z2)) {
502 math_error("Negative power in zpowermod");
503 /*NOTREACHED*/
508 * Check easy cases first.
510 if ((ziszero(z1) && !ziszero(z2)) || zisunit(z3)) {
511 /* 0^(non_zero) or x^y mod 1 always produces zero */
512 *res = _zero_;
513 return;
515 if (ziszero(z2)) { /* x^0 == 1 */
516 *res = _one_;
517 return;
519 if (zistwo(z3)) { /* mod 2 */
520 if (zisodd(z1))
521 *res = _one_;
522 else
523 *res = _zero_;
524 return;
526 if (zisunit(z1) && (!z1.sign || ziseven(z2))) {
527 /* 1^x or (-1)^(2x) */
528 *res = _one_;
529 return;
533 * Normalize the number being raised to be non-negative and to lie
534 * within the modulo range. Then check for zero or one specially.
536 ztmp.len = 0;
537 if (zisneg(z1) || zrel(z1, z3) >= 0) {
538 zmod(z1, z3, &ztmp, 0);
539 z1 = ztmp;
541 if (ziszero(z1)) {
542 if (ztmp.len)
543 zfree(ztmp);
544 *res = _zero_;
545 return;
547 if (zisone(z1)) {
548 if (ztmp.len)
549 zfree(ztmp);
550 *res = _one_;
551 return;
555 * If modulus is large enough use zmod5
557 if (z3.len >= conf->pow2) {
558 if (havelastmod && zcmp(z3, *lastmod)) {
559 zfree(*lastmod);
560 zfree(*lastmodinv);
561 havelastmod = FALSE;
563 if (!havelastmod) {
564 zcopy(z3, lastmod);
565 zbitvalue(2 * z3.len * BASEB, &temp);
566 zquo(temp, z3, lastmodinv, 0);
567 zfree(temp);
568 havelastmod = TRUE;
571 /* zzz */
572 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
573 pp->len = 0;
574 pp->v = NULL;
576 lowpowers[0] = _one_;
577 lowpowers[1] = z1;
578 ans = _one_;
580 hp = &z2.v[z2.len - 1];
581 curhalf = *hp;
582 curshift = BASEB - POWBITS;
583 while (curshift && ((curhalf >> curshift) == 0))
584 curshift -= POWBITS;
587 * Calculate the result by examining the power POWBITS bits at
588 * a time, and use the table of low powers at each iteration.
590 for (;;) {
591 curpow = (curhalf >> curshift) & (POWNUMS - 1);
592 pp = &lowpowers[curpow];
595 * If the small power is not yet saved in the table,
596 * then calculate it and remember it in the table for
597 * future use.
599 if (pp->v == NULL) {
600 if (curpow & 0x1)
601 zcopy(z1, &modpow);
602 else
603 modpow = _one_;
605 for (curbit = 0x2;
606 curbit <= curpow;
607 curbit *= 2) {
608 pp = &lowpowers[curbit];
609 if (pp->v == NULL) {
610 zsquare(lowpowers[curbit/2],
611 &temp);
612 zmod5(&temp);
613 zcopy(temp, pp);
614 zfree(temp);
616 if (curbit & curpow) {
617 zmul(*pp, modpow, &temp);
618 zfree(modpow);
619 zmod5(&temp);
620 zcopy(temp, &modpow);
621 zfree(temp);
624 pp = &lowpowers[curpow];
625 if (pp->v != NULL) {
626 zfree(*pp);
628 *pp = modpow;
632 * If the power is nonzero, then accumulate the small
633 * power into the result.
635 if (curpow) {
636 zmul(ans, *pp, &temp);
637 zfree(ans);
638 zmod5(&temp);
639 zcopy(temp, &ans);
640 zfree(temp);
644 * Select the next POWBITS bits of the power, if
645 * there is any more to generate.
647 curshift -= POWBITS;
648 if (curshift < 0) {
649 if (hp == z2.v)
650 break;
651 curhalf = *--hp;
652 curshift = BASEB - POWBITS;
656 * Square the result POWBITS times to make room for
657 * the next chunk of bits.
659 for (i = 0; i < POWBITS; i++) {
660 zsquare(ans, &temp);
661 zfree(ans);
662 zmod5(&temp);
663 zcopy(temp, &ans);
664 zfree(temp);
668 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
669 if (pp->v != NULL)
670 freeh(pp->v);
672 *res = ans;
673 if (ztmp.len)
674 zfree(ztmp);
675 return;
679 * If the modulus is odd and small enough then use
680 * the REDC algorithm. The size where this is done is configurable.
682 if (z3.len < conf->redc2 && zisodd(z3)) {
683 if (powermodredc && zcmp(powermodredc->mod, z3)) {
684 zredcfree(powermodredc);
685 powermodredc = NULL;
687 if (powermodredc == NULL)
688 powermodredc = zredcalloc(z3);
689 rp = powermodredc;
690 zredcencode(rp, z1, &temp);
691 zredcpower(rp, temp, z2, &z1);
692 zfree(temp);
693 zredcdecode(rp, z1, res);
694 zfree(z1);
695 return;
699 * Modulus or power is small enough to perform the power raising
700 * directly. Initialize the table of powers.
702 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
703 pp->len = 0;
704 pp->v = NULL;
706 lowpowers[0] = _one_;
707 lowpowers[1] = z1;
708 ans = _one_;
710 hp = &z2.v[z2.len - 1];
711 curhalf = *hp;
712 curshift = BASEB - POWBITS;
713 while (curshift && ((curhalf >> curshift) == 0))
714 curshift -= POWBITS;
717 * Calculate the result by examining the power POWBITS bits at a time,
718 * and use the table of low powers at each iteration.
720 for (;;) {
721 curpow = (curhalf >> curshift) & (POWNUMS - 1);
722 pp = &lowpowers[curpow];
725 * If the small power is not yet saved in the table, then
726 * calculate it and remember it in the table for future use.
728 if (pp->v == NULL) {
729 if (curpow & 0x1)
730 zcopy(z1, &modpow);
731 else
732 modpow = _one_;
734 for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
735 pp = &lowpowers[curbit];
736 if (pp->v == NULL) {
737 zsquare(lowpowers[curbit/2], &temp);
738 zmod(temp, z3, pp, 0);
739 zfree(temp);
741 if (curbit & curpow) {
742 zmul(*pp, modpow, &temp);
743 zfree(modpow);
744 zmod(temp, z3, &modpow, 0);
745 zfree(temp);
748 pp = &lowpowers[curpow];
749 if (pp->v != NULL) {
750 zfree(*pp);
752 *pp = modpow;
756 * If the power is nonzero, then accumulate the small power
757 * into the result.
759 if (curpow) {
760 zmul(ans, *pp, &temp);
761 zfree(ans);
762 zmod(temp, z3, &ans, 0);
763 zfree(temp);
767 * Select the next POWBITS bits of the power, if there is
768 * any more to generate.
770 curshift -= POWBITS;
771 if (curshift < 0) {
772 if (hp-- == z2.v)
773 break;
774 curhalf = *hp;
775 curshift = BASEB - POWBITS;
779 * Square the result POWBITS times to make room for the next
780 * chunk of bits.
782 for (i = 0; i < POWBITS; i++) {
783 zsquare(ans, &temp);
784 zfree(ans);
785 zmod(temp, z3, &ans, 0);
786 zfree(temp);
790 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
791 if (pp->v != NULL)
792 freeh(pp->v);
794 *res = ans;
795 if (ztmp.len)
796 zfree(ztmp);
800 * Given a positive odd N-word integer z, evaluate minv(-z, BASEB^N)
802 S_FUNC void
803 zredcmodinv(ZVALUE z, ZVALUE *res)
805 ZVALUE tmp;
806 HALF *a0, *a, *b;
807 HALF bit, h, inv, v;
808 FULL f;
809 LEN N, i, j, len;
811 N = z.len;
812 tmp.sign = 0;
813 tmp.len = N;
814 tmp.v = alloc(N);
815 zclearval(tmp);
816 *tmp.v = 1;
817 h = 1 + *z.v;
818 bit = 1;
819 inv = 1;
820 while (h) {
821 bit <<= 1;
822 if (bit & h) {
823 inv |= bit;
824 h += bit * *z.v;
828 j = N;
829 a0 = tmp.v;
830 while (j-- > 0) {
831 v = inv * *a0;
832 i = j;
833 a = a0;
834 b = z.v;
835 f = (FULL) v * (FULL) *b++ + (FULL) *a++;
836 *a0 = v;
837 while (i-- > 0) {
838 f = (FULL) v * (FULL) *b++ + (FULL) *a + (f >> BASEB);
839 *a++ = (HALF) f;
841 while (j > 0 && *++a0 == 0)
842 j--;
844 a = tmp.v + N;
845 len = N;
846 while (*--a == 0)
847 len--;
848 tmp.len = len;
849 zcopy(tmp, res);
850 zfree(tmp);
855 * Initialize the REDC algorithm for a particular modulus,
856 * returning a pointer to a structure that is used for other
857 * REDC calls. An error is generated if the structure cannot
858 * be allocated. The modulus must be odd and positive.
860 * given:
861 * z1 modulus to initialize for
863 REDC *
864 zredcalloc(ZVALUE z1)
866 REDC *rp; /* REDC information */
867 ZVALUE tmp;
868 long bit;
870 if (ziseven(z1) || zisneg(z1)) {
871 math_error("REDC requires positive odd modulus");
872 /*NOTREACHED*/
875 rp = (REDC *) malloc(sizeof(REDC));
876 if (rp == NULL) {
877 math_error("Cannot allocate REDC structure");
878 /*NOTREACHED*/
882 * Round up the binary modulus to the next power of two
883 * which is at a word boundary. Then the shift and modulo
884 * operations mod the binary modulus can be done very cheaply.
885 * Calculate the REDC format for the number 1 for future use.
887 zcopy(z1, &rp->mod);
888 zredcmodinv(z1, &rp->inv);
889 bit = zhighbit(z1) + 1;
890 if (bit % BASEB)
891 bit += (BASEB - (bit % BASEB));
892 zbitvalue(bit, &tmp);
893 zmod(tmp, rp->mod, &rp->one, 0);
894 zfree(tmp);
895 rp->len = (LEN)(bit / BASEB);
896 return rp;
901 * Free any numbers associated with the specified REDC structure,
902 * and then the REDC structure itself.
904 * given:
905 * rp REDC information to be cleared
907 void
908 zredcfree(REDC *rp)
910 zfree(rp->mod);
911 zfree(rp->inv);
912 zfree(rp->one);
913 free(rp);
918 * Convert a normal number into the specified REDC format.
919 * The number to be converted can be negative or out of modulo range.
920 * The resulting number can be used for multiplying, adding, subtracting,
921 * or comparing with any other such converted numbers, as if the numbers
922 * were being calculated modulo the number which initialized the REDC
923 * information. When the final value is unconverted, the result is the
924 * same as if the usual operations were done with the original numbers.
926 * given:
927 * rp REDC information
928 * z1 number to be converted
929 * res returned converted number
931 void
932 zredcencode(REDC *rp, ZVALUE z1, ZVALUE *res)
934 ZVALUE tmp1;
937 * Confirm or initialize lastmod information when modulus is a
938 * big number.
941 if (rp->len >= conf->pow2) {
942 if (havelastmod && zcmp(rp->mod, *lastmod)) {
943 zfree(*lastmod);
944 zfree(*lastmodinv);
945 havelastmod = FALSE;
947 if (!havelastmod) {
948 zcopy(rp->mod, lastmod);
949 zbitvalue(2 * rp->len * BASEB, &tmp1);
950 zquo(tmp1, rp->mod, lastmodinv, 0);
951 zfree(tmp1);
952 havelastmod = TRUE;
956 * Handle the cases 0, 1, -1, and 2 specially since these are
957 * easy to calculate. Zero transforms to zero, and the others
958 * can be obtained from the precomputed REDC format for 1 since
959 * addition and subtraction act normally for REDC format numbers.
961 if (ziszero(z1)) {
962 *res = _zero_;
963 return;
965 if (zisone(z1)) {
966 zcopy(rp->one, res);
967 return;
969 if (zisunit(z1)) {
970 zsub(rp->mod, rp->one, res);
971 return;
973 if (zistwo(z1)) {
974 zadd(rp->one, rp->one, &tmp1);
975 if (zrel(tmp1, rp->mod) < 0) {
976 *res = tmp1;
977 return;
979 zsub(tmp1, rp->mod, res);
980 zfree(tmp1);
981 return;
985 * Not a trivial number to convert, so do the full transformation.
987 zshift(z1, rp->len * BASEB, &tmp1);
988 if (rp->len < conf->pow2)
989 zmod(tmp1, rp->mod, res, 0);
990 else
991 zmod6(tmp1, res);
992 zfree(tmp1);
997 * The REDC algorithm used to convert numbers out of REDC format and also
998 * used after multiplication of two REDC numbers. Using this routine
999 * avoids any divides, replacing the divide by two multiplications.
1000 * If the numbers are very large, then these two multiplies will be
1001 * quicker than the divide, since dividing is harder than multiplying.
1003 * given:
1004 * rp REDC information
1005 * z1 number to be transformed
1006 * res returned transformed number
1008 void
1009 zredcdecode(REDC *rp, ZVALUE z1, ZVALUE *res)
1011 ZVALUE tmp1, tmp2;
1012 ZVALUE ztmp;
1013 ZVALUE ztop;
1014 ZVALUE zp1;
1015 FULL muln;
1016 HALF *h1;
1017 HALF *h3;
1018 HALF *hd = NULL;
1019 HALF Ninv;
1020 LEN modlen;
1021 LEN len;
1022 FULL f;
1023 int sign;
1024 int i, j;
1027 * Check first for the special values for 0 and 1 that are easy.
1029 if (ziszero(z1)) {
1030 *res = _zero_;
1031 return;
1033 if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1034 (zcmp(z1, rp->one) == 0)) {
1035 *res = _one_;
1036 return;
1038 ztop.len = 0;
1039 ztmp.len = 0;
1040 modlen = rp->len;
1041 sign = z1.sign;
1042 z1.sign = 0;
1043 if (z1.len > modlen) {
1044 ztop.v = z1.v + modlen;
1045 ztop.len = z1.len - modlen;
1046 ztop.sign = 0;
1047 if (zrel(ztop, rp->mod) >= 0) {
1048 zmod(ztop, rp->mod, &ztmp, 0);
1049 ztop = ztmp;
1051 len = modlen;
1052 h1 = z1.v + len;
1053 while (len > 0 && *--h1 == 0)
1054 len--;
1055 if (len == 0) {
1056 if (ztmp.len)
1057 *res = ztmp;
1058 else
1059 zcopy(ztop, res);
1060 return;
1062 z1.len = len;
1065 if (rp->mod.len < conf->pow2) {
1066 Ninv = rp->inv.v[0];
1067 res->sign = 0;
1068 res->len = modlen;
1069 res->v = alloc(modlen);
1070 zclearval(*res);
1071 h1 = z1.v;
1072 for (i = 0; i < modlen; i++) {
1073 h3 = rp->mod.v;
1074 hd = res->v;
1075 f = (FULL) *hd++;
1076 if (i < z1.len)
1077 f += (FULL) *h1++;
1078 muln = (HALF) ((f & BASE1) * Ninv);
1079 f = ((muln * (FULL) *h3++) + f) >> BASEB;
1080 j = modlen;
1081 while (--j > 0) {
1082 f += (muln * (FULL) *h3++) + (FULL) *hd;
1083 hd[-1] = (HALF) f;
1084 f >>= BASEB;
1085 hd++;
1087 hd[-1] = (HALF) f;
1089 len = modlen;
1090 while (*--hd == 0 && len > 1)
1091 len--;
1092 if (len == 0)
1093 len = 1;
1094 res->len = len;
1095 } else {
1096 /* Here 0 < z1 < 2^bitnum */
1099 * First calculate the following:
1100 * tmp2 = ((z1 * inv) % 2^bitnum.
1101 * The mod operations can be done with no work since the bit
1102 * number was selected as a multiple of the word size. Just
1103 * reduce the sizes of the numbers as required.
1105 zmul(z1, rp->inv, &tmp2);
1106 if (tmp2.len > modlen) {
1107 h1 = tmp2.v + modlen;
1108 len = modlen;
1109 while (len > 0 && *--h1 == 0)
1110 len--;
1111 tmp2.len = len;
1115 * Next calculate the following:
1116 * res = (z1 + tmp2 * modulus) / 2^bitnum
1117 * Since 0 < z1 < 2^bitnum and the division is always exact,
1118 * the quotient can be evaluated by rounding up
1119 * (tmp2 * modulus)/2^bitnum. This can be achieved by defining
1120 * zp1 by an appropriate shift and then adding one.
1122 zmul(tmp2, rp->mod, &tmp1);
1123 zfree(tmp2);
1124 if (tmp1.len > modlen) {
1125 zp1.v = tmp1.v + modlen;
1126 zp1.len = tmp1.len - modlen;
1127 zp1.sign = 0;
1128 zadd(zp1, _one_, res);
1129 } else {
1130 *res = _one_;
1132 zfree(tmp1);
1134 if (ztop.len) {
1135 zadd(*res, ztop, &tmp1);
1136 zfree(*res);
1137 if (ztmp.len)
1138 zfree(ztmp);
1139 *res = tmp1;
1143 * Finally do a final modulo by a simple subtraction if necessary.
1144 * This is all that is needed because the previous calculation is
1145 * guaranteed to always be less than twice the modulus.
1148 if (zrel(*res, rp->mod) >= 0) {
1149 zsub(*res, rp->mod, &tmp1);
1150 zfree(*res);
1151 *res = tmp1;
1153 if (sign && !ziszero(*res)) {
1154 zsub(rp->mod, *res, &tmp1);
1155 zfree(*res);
1156 *res = tmp1;
1158 return;
1163 * Multiply two numbers in REDC format together producing a result also
1164 * in REDC format. If the result is converted back to a normal number,
1165 * then the result is the same as the modulo'd multiplication of the
1166 * original numbers before they were converted to REDC format. This
1167 * calculation is done in one of two ways, depending on the size of the
1168 * modulus. For large numbers, the REDC definition is used directly
1169 * which involves three multiplies overall. For small numbers, a
1170 * complicated routine is used which does the indicated multiplication
1171 * and the REDC algorithm at the same time to produce the result.
1173 * given:
1174 * rp REDC information
1175 * z1 first REDC number to be multiplied
1176 * z2 second REDC number to be multiplied
1177 * res resulting REDC number
1179 void
1180 zredcmul(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
1182 FULL mulb;
1183 FULL muln;
1184 HALF *h1;
1185 HALF *h2;
1186 HALF *h3;
1187 HALF *hd;
1188 HALF Ninv;
1189 HALF topdigit = 0;
1190 LEN modlen;
1191 LEN len;
1192 LEN len2;
1193 SIUNION sival1;
1194 SIUNION sival2;
1195 SIUNION carry;
1196 ZVALUE tmp;
1197 ZVALUE z1tmp, z2tmp;
1198 int sign;
1200 sign = z1.sign ^ z2.sign;
1201 z1.sign = 0;
1202 z2.sign = 0;
1203 z1tmp.len = 0;
1204 if (zrel(z1, rp->mod) >= 0) {
1205 zmod(z1, rp->mod, &z1tmp, 0);
1206 z1 = z1tmp;
1208 z2tmp.len = 0;
1209 if (zrel(z2, rp->mod) >= 0) {
1210 zmod(z2, rp->mod, &z2tmp, 0);
1211 z2 = z2tmp;
1216 * Check for special values which we easily know the answer.
1218 if (ziszero(z1) || ziszero(z2)) {
1219 *res = _zero_;
1220 if (z1tmp.len)
1221 zfree(z1tmp);
1222 if (z2tmp.len)
1223 zfree(z2tmp);
1224 return;
1227 if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1228 (zcmp(z1, rp->one) == 0)) {
1229 if (sign)
1230 zsub(rp->mod, z2, res);
1231 else
1232 zcopy(z2, res);
1233 if (z1tmp.len)
1234 zfree(z1tmp);
1235 if (z2tmp.len)
1236 zfree(z2tmp);
1237 return;
1240 if ((z2.len == rp->one.len) && (z2.v[0] == rp->one.v[0]) &&
1241 (zcmp(z2, rp->one) == 0)) {
1242 if (sign)
1243 zsub(rp->mod, z1, res);
1244 else
1245 zcopy(z1, res);
1246 if (z1tmp.len)
1247 zfree(z1tmp);
1248 if (z2tmp.len)
1249 zfree(z2tmp);
1250 return;
1254 * If the size of the modulus is large, then just do the multiply,
1255 * followed by the two multiplies contained in the REDC routine.
1256 * This will be quicker than directly doing the REDC calculation
1257 * because of the O(N^1.585) speed of the multiplies. The size
1258 * of the number which this is done is configurable.
1260 if (rp->mod.len >= conf->redc2) {
1261 zmul(z1, z2, &tmp);
1262 zredcdecode(rp, tmp, res);
1263 zfree(tmp);
1264 if (sign && !ziszero(*res)) {
1265 zsub(rp->mod, *res, &tmp);
1266 zfree(*res);
1267 *res = tmp;
1269 if (z1tmp.len)
1270 zfree(z1tmp);
1271 if (z2tmp.len)
1272 zfree(z2tmp);
1273 return;
1277 * The number is small enough to calculate by doing the O(N^2) REDC
1278 * algorithm directly. This algorithm performs the multiplication and
1279 * the reduction at the same time. Notice the obscure facts that
1280 * only the lowest word of the inverse value is used, and that
1281 * there is no shifting of the partial products as there is in a
1282 * normal multiply.
1284 modlen = rp->mod.len;
1285 Ninv = rp->inv.v[0];
1288 * Allocate the result and clear it.
1289 * The size of the result will be equal to or smaller than
1290 * the modulus size.
1292 res->sign = 0;
1293 res->len = modlen;
1294 res->v = alloc(modlen);
1296 hd = res->v;
1297 len = modlen;
1298 zclearval(*res);
1301 * Do this outermost loop over all the digits of z1.
1303 h1 = z1.v;
1304 len = z1.len;
1305 while (len--) {
1307 * Start off with the next digit of z1, the first
1308 * digit of z2, and the first digit of the modulus.
1310 mulb = (FULL) *h1++;
1311 h2 = z2.v;
1312 h3 = rp->mod.v;
1313 hd = res->v;
1314 sival1.ivalue = mulb * ((FULL) *h2++) + ((FULL) *hd++);
1315 muln = ((HALF) (sival1.silow * Ninv));
1316 sival2.ivalue = muln * ((FULL) *h3++) + ((FULL) sival1.silow);
1317 carry.ivalue = ((FULL) sival1.sihigh) + ((FULL) sival2.sihigh);
1320 * Do this innermost loop for each digit of z2, except
1321 * for the first digit which was just done above.
1323 len2 = z2.len;
1324 while (--len2 > 0) {
1325 sival1.ivalue = mulb * ((FULL) *h2++)
1326 + ((FULL) *hd) + ((FULL) carry.silow);
1327 sival2.ivalue = muln * ((FULL) *h3++)
1328 + ((FULL) sival1.silow);
1329 carry.ivalue = ((FULL) sival1.sihigh)
1330 + ((FULL) sival2.sihigh)
1331 + ((FULL) carry.sihigh);
1333 hd[-1] = sival2.silow;
1334 hd++;
1338 * Now continue the loop as necessary so the total number
1339 * of iterations is equal to the size of the modulus.
1340 * This acts as if the innermost loop was repeated for
1341 * high digits of z2 that are zero.
1343 len2 = modlen - z2.len;
1344 while (len2--) {
1345 sival2.ivalue = muln * ((FULL) *h3++)
1346 + ((FULL) *hd)
1347 + ((FULL) carry.silow);
1348 carry.ivalue = ((FULL) sival2.sihigh)
1349 + ((FULL) carry.sihigh);
1351 hd[-1] = sival2.silow;
1352 hd++;
1355 carry.ivalue += topdigit;
1356 hd[-1] = carry.silow;
1357 topdigit = carry.sihigh;
1361 * Now continue the loop as necessary so the total number
1362 * of iterations is equal to the size of the modulus.
1363 * This acts as if the outermost loop was repeated for high
1364 * digits of z1 that are zero.
1366 len = modlen - z1.len;
1367 while (len--) {
1369 * Start off with the first digit of the modulus.
1371 h3 = rp->mod.v;
1372 hd = res->v;
1373 muln = ((HALF) (*hd * Ninv));
1374 sival2.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
1375 carry.ivalue = ((FULL) sival2.sihigh);
1378 * Do this innermost loop for each digit of the modulus,
1379 * except for the first digit which was just done above.
1381 len2 = modlen;
1382 while (--len2 > 0) {
1383 sival2.ivalue = muln * ((FULL) *h3++)
1384 + ((FULL) *hd) + ((FULL) carry.silow);
1385 carry.ivalue = ((FULL) sival2.sihigh)
1386 + ((FULL) carry.sihigh);
1388 hd[-1] = sival2.silow;
1389 hd++;
1391 carry.ivalue += topdigit;
1392 hd[-1] = carry.silow;
1393 topdigit = carry.sihigh;
1397 * Determine the true size of the result, taking the top digit of
1398 * the current result into account. The top digit is not stored in
1399 * the number because it is temporary and would become zero anyway
1400 * after the final subtraction is done.
1402 if (topdigit == 0) {
1403 len = modlen;
1404 while (*--hd == 0 && len > 1) {
1405 len--;
1407 res->len = len;
1410 * Compare the result with the modulus.
1411 * If it is less than the modulus, then the calculation is complete.
1414 if (zrel(*res, rp->mod) < 0) {
1415 if (z1tmp.len)
1416 zfree(z1tmp);
1417 if (z2tmp.len)
1418 zfree(z2tmp);
1419 if (sign && !ziszero(*res)) {
1420 zsub(rp->mod, *res, &tmp);
1421 zfree(*res);
1422 *res = tmp;
1424 return;
1429 * Do a subtraction to reduce the result to a value less than
1430 * the modulus. The REDC algorithm guarantees that a single subtract
1431 * is all that is needed. Ignore any borrowing from the possible
1432 * highest word of the current result because that would affect
1433 * only the top digit value that was not stored and would become
1434 * zero anyway.
1436 carry.ivalue = 0;
1437 h1 = rp->mod.v;
1438 hd = res->v;
1439 len = modlen;
1440 while (len--) {
1441 carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
1442 + ((FULL) carry.silow);
1443 *hd++ = (HALF)(BASE1 - carry.silow);
1444 carry.silow = carry.sihigh;
1448 * Now finally recompute the size of the result.
1450 len = modlen;
1451 hd = &res->v[len - 1];
1452 while ((*hd == 0) && (len > 1)) {
1453 hd--;
1454 len--;
1456 res->len = len;
1457 if (z1tmp.len)
1458 zfree(z1tmp);
1459 if (z2tmp.len)
1460 zfree(z2tmp);
1461 if (sign && !ziszero(*res)) {
1462 zsub(rp->mod, *res, &tmp);
1463 zfree(*res);
1464 *res = tmp;
1470 * Square a number in REDC format producing a result also in REDC format.
1472 * given:
1473 * rp REDC information
1474 * z1 REDC number to be squared
1475 * res resulting REDC number
1477 void
1478 zredcsquare(REDC *rp, ZVALUE z1, ZVALUE *res)
1480 FULL mulb;
1481 FULL muln;
1482 HALF *h1;
1483 HALF *h2;
1484 HALF *h3;
1485 HALF *hd = NULL;
1486 HALF Ninv;
1487 HALF topdigit = 0;
1488 LEN modlen;
1489 LEN len;
1490 SIUNION sival1;
1491 SIUNION sival2;
1492 SIUNION sival3;
1493 SIUNION carry;
1494 ZVALUE tmp, ztmp;
1495 FULL f;
1496 int i, j;
1498 ztmp.len = 0;
1499 z1.sign = 0;
1500 if (zrel(z1, rp->mod) >= 0) {
1501 zmod(z1, rp->mod, &ztmp, 0);
1502 z1 = ztmp;
1504 if (ziszero(z1)) {
1505 *res = _zero_;
1506 if (ztmp.len)
1507 zfree(ztmp);
1508 return;
1510 if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1511 (zcmp(z1, rp->one) == 0)) {
1512 zcopy(z1, res);
1513 if (ztmp.len)
1514 zfree(ztmp);
1515 return;
1520 * If the modulus is small enough, then call the multiply
1521 * routine to produce the result. Otherwise call the O(N^1.585)
1522 * routines to get the answer.
1524 if (rp->mod.len >= conf->redc2
1525 || 3 * z1.len < 2 * rp->mod.len) {
1526 zsquare(z1, &tmp);
1527 zredcdecode(rp, tmp, res);
1528 zfree(tmp);
1529 if (ztmp.len)
1530 zfree(ztmp);
1531 return;
1533 modlen = rp->mod.len;
1534 Ninv = rp->inv.v[0];
1536 res->sign = 0;
1537 res->len = modlen;
1538 res->v = alloc(modlen);
1540 zclearval(*res);
1542 h1 = z1.v;
1544 for (i = 0; i < z1.len; i++) {
1545 mulb = (FULL) *h1++;
1546 h2 = h1;
1547 h3 = rp->mod.v;
1548 hd = res->v;
1549 if (i == 0) {
1550 sival1.ivalue = mulb * mulb;
1551 muln = (HALF) (sival1.silow * Ninv);
1552 sival2.ivalue = muln * ((FULL) *h3++)
1553 + (FULL) sival1.silow;
1554 carry.ivalue = (FULL) sival1.sihigh
1555 + (FULL) sival2.sihigh;
1556 hd++;
1557 } else {
1558 muln = (HALF) (*hd * Ninv);
1559 f = (muln * ((FULL) *h3++) + (FULL) *hd++) >> BASEB;
1560 j = i;
1561 while (--j > 0) {
1562 f += muln * ((FULL) *h3++) + *hd;
1563 hd[-1] = (HALF) f;
1564 f >>= BASEB;
1565 hd++;
1567 carry.ivalue = f;
1568 sival1.ivalue = mulb * mulb + (FULL) carry.silow;
1569 sival2.ivalue = muln * ((FULL) *h3++)
1570 + (FULL) *hd
1571 + (FULL) sival1.silow;
1572 carry.ivalue = (FULL) sival1.sihigh
1573 + (FULL) sival2.sihigh
1574 + (FULL) carry.sihigh;
1575 hd[-1] = sival2.silow;
1576 hd++;
1578 j = z1.len - i;
1579 while (--j > 0) {
1580 sival1.ivalue = mulb * ((FULL) *h2++);
1581 sival2.ivalue = ((FULL) sival1.silow << 1)
1582 + muln * ((FULL) *h3++);
1583 sival3.ivalue = (FULL) sival2.silow
1584 + (FULL) *hd
1585 + (FULL) carry.silow;
1586 carry.ivalue = ((FULL) sival1.sihigh << 1)
1587 + (FULL) sival2.sihigh
1588 + (FULL) sival3.sihigh
1589 + (FULL) carry.sihigh;
1590 hd[-1] = sival3.silow;
1591 hd++;
1593 j = modlen - z1.len;
1594 while (j-- > 0) {
1595 sival1.ivalue = muln * ((FULL) *h3++)
1596 + (FULL) *hd
1597 + (FULL) carry.silow;
1598 carry.ivalue = (FULL) sival1.sihigh
1599 + (FULL) carry.sihigh;
1600 hd[-1] = sival1.silow;
1601 hd++;
1603 carry.ivalue += (FULL) topdigit;
1604 hd[-1] = carry.silow;
1605 topdigit = carry.sihigh;
1607 i = modlen - z1.len;
1608 while (i-- > 0) {
1609 h3 = rp->mod.v;
1610 hd = res->v;
1611 muln = (HALF) (*hd * Ninv);
1612 sival1.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
1613 carry.ivalue = (FULL) sival1.sihigh;
1614 j = modlen;
1615 while (--j > 0) {
1616 sival1.ivalue = muln * ((FULL) *h3++)
1617 + (FULL) *hd
1618 + (FULL) carry.silow;
1619 carry.ivalue = (FULL) sival1.sihigh
1620 + (FULL) carry.sihigh;
1621 hd[-1] = sival1.silow;
1622 hd++;
1624 carry.ivalue += (FULL) topdigit;
1625 hd[-1] = carry.silow;
1626 topdigit = carry.sihigh;
1628 if (topdigit == 0) {
1629 len = modlen;
1630 while (*--hd == 0 && len > 1) {
1631 len--;
1633 res->len = len;
1634 if (zrel(*res, rp->mod) < 0) {
1635 if (ztmp.len)
1636 zfree(ztmp);
1637 return;
1641 carry.ivalue = 0;
1642 h1 = rp->mod.v;
1643 hd = res->v;
1644 len = modlen;
1645 while (len--) {
1646 carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
1647 + ((FULL) carry.silow);
1648 *hd++ = (HALF)(BASE1 - carry.silow);
1649 carry.silow = carry.sihigh;
1652 len = modlen;
1653 hd = &res->v[len - 1];
1654 while ((*hd == 0) && (len > 1)) {
1655 hd--;
1656 len--;
1658 res->len = len;
1659 if (ztmp.len)
1660 zfree(ztmp);
1665 * Compute the result of raising a REDC format number to a power.
1666 * The result is within the range 0 to the modulus - 1.
1667 * This calculates the result by examining the power POWBITS bits at a time,
1668 * using a small table of POWNUMS low powers to calculate powers for those bits,
1669 * and repeated squaring and multiplying by the partial powers to generate
1670 * the complete power.
1672 * given:
1673 * rp REDC information
1674 * z1 REDC number to be raised
1675 * z2 normal number to raise number to
1676 * res result
1678 void
1679 zredcpower(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
1681 HALF *hp; /* pointer to current word of the power */
1682 ZVALUE *pp; /* pointer to low power table */
1683 ZVALUE ans, temp; /* calculation values */
1684 ZVALUE ztmp;
1685 ZVALUE modpow; /* current small power */
1686 ZVALUE lowpowers[POWNUMS]; /* low powers */
1687 int curshift; /* shift value for word of power */
1688 HALF curhalf; /* current word of power */
1689 unsigned int curpow; /* current low power */
1690 unsigned int curbit; /* current bit of low power */
1691 int sign;
1692 int i;
1694 if (zisneg(z2)) {
1695 math_error("Negative power in zredcpower");
1696 /*NOTREACHED*/
1699 if (zisunit(rp->mod)) {
1700 *res = _zero_;
1701 return;
1704 sign = zisodd(z2) ? z1.sign : 0;
1705 z1.sign = 0;
1706 ztmp.len = 0;
1707 if (zrel(z1, rp->mod) >= 0) {
1708 zmod(z1, rp->mod, &ztmp, 0);
1709 z1 = ztmp;
1712 * Check for zero or the REDC format for one.
1714 if (ziszero(z1)) {
1715 if (ziszero(z2))
1716 *res = _one_;
1717 else
1718 *res = _zero_;
1719 if (ztmp.len)
1720 zfree(ztmp);
1721 return;
1723 if (zcmp(z1, rp->one) == 0) {
1724 if (sign)
1725 zsub(rp->mod, rp->one, res);
1726 else
1727 zcopy(rp->one, res);
1728 if (ztmp.len)
1729 zfree(ztmp);
1730 return;
1734 * See if the number being raised is the REDC format for -1.
1735 * If so, then the answer is the REDC format for one or minus one.
1736 * To do this check, calculate the REDC format for -1.
1738 if (((HALF)(z1.v[0] + rp->one.v[0])) == rp->mod.v[0]) {
1739 zsub(rp->mod, rp->one, &temp);
1740 if (zcmp(z1, temp) == 0) {
1741 if (zisodd(z2) ^ sign) {
1742 *res = temp;
1743 if (ztmp.len)
1744 zfree(ztmp);
1745 return;
1747 zfree(temp);
1748 zcopy(rp->one, res);
1749 if (ztmp.len)
1750 zfree(ztmp);
1751 return;
1753 zfree(temp);
1756 for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++)
1757 pp->len = 0;
1758 zcopy(rp->one, &lowpowers[0]);
1759 zcopy(z1, &lowpowers[1]);
1760 zcopy(rp->one, &ans);
1762 hp = &z2.v[z2.len - 1];
1763 curhalf = *hp;
1764 curshift = BASEB - POWBITS;
1765 while (curshift && ((curhalf >> curshift) == 0))
1766 curshift -= POWBITS;
1769 * Calculate the result by examining the power POWBITS bits at a time,
1770 * and use the table of low powers at each iteration.
1772 for (;;) {
1773 curpow = (curhalf >> curshift) & (POWNUMS - 1);
1774 pp = &lowpowers[curpow];
1777 * If the small power is not yet saved in the table, then
1778 * calculate it and remember it in the table for future use.
1780 if (pp->len == 0) {
1781 if (curpow & 0x1)
1782 zcopy(z1, &modpow);
1783 else
1784 zcopy(rp->one, &modpow);
1786 for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
1787 pp = &lowpowers[curbit];
1788 if (pp->len == 0)
1789 zredcsquare(rp, lowpowers[curbit/2],
1790 pp);
1791 if (curbit & curpow) {
1792 zredcmul(rp, *pp, modpow, &temp);
1793 zfree(modpow);
1794 modpow = temp;
1797 pp = &lowpowers[curpow];
1798 if (pp->len > 0) {
1799 zfree(*pp);
1801 *pp = modpow;
1805 * If the power is nonzero, then accumulate the small power
1806 * into the result.
1808 if (curpow) {
1809 zredcmul(rp, ans, *pp, &temp);
1810 zfree(ans);
1811 ans = temp;
1815 * Select the next POWBITS bits of the power, if there is
1816 * any more to generate.
1818 curshift -= POWBITS;
1819 if (curshift < 0) {
1820 if (hp-- == z2.v)
1821 break;
1822 curhalf = *hp;
1823 curshift = BASEB - POWBITS;
1827 * Square the result POWBITS times to make room for the next
1828 * chunk of bits.
1830 for (i = 0; i < POWBITS; i++) {
1831 zredcsquare(rp, ans, &temp);
1832 zfree(ans);
1833 ans = temp;
1837 for (pp = lowpowers; pp < &lowpowers[POWNUMS]; pp++) {
1838 if (pp->len)
1839 freeh(pp->v);
1841 if (sign && !ziszero(ans)) {
1842 zsub(rp->mod, ans, res);
1843 zfree(ans);
1844 } else {
1845 *res = ans;
1847 if (ztmp.len)
1848 zfree(ztmp);
1853 * zhnrmod - compute z mod h*2^n+r
1855 * We compute v mod h*2^n+r, where h>0, n>0, abs(r) <= 1, as follows:
1857 * Let v = b*2^n + a, where 0 <= a < 2^n
1859 * Now v mod h*2^n+r == b*2^n + a mod h*2^n+r,
1860 * and thus v mod h*2^n+r == b*2^n mod h*2^n+r + a mod h*2^n+r.
1862 * Because 0 <= a < 2^n < h*2^n+r, a mod h*2^n+r == a.
1863 * Thus v mod h*2^n+r == b*2^n mod h*2^n+r + a.
1865 * It can be shown that b*2^n mod h*2^n == 2^n * (b mod h).
1867 * Thus for r == 0, v mod h*2^n+r == (2^n)*(b mod h) + a.
1869 * It can be shown that v mod 2^n-1 == a+b mod 2^n-1.
1871 * Thus for r == -1, v mod h*2^n+r == (2^n)*(b mod h) + a + int(b/h).
1873 * It can be shown that v mod 2^n+1 == a-b mod 2^n+1.
1875 * Thus for r == +1, v mod h*2^n+r == (2^n)*(b mod h) + a - int(b/h).
1877 * Therefore, v mod h*2^n+r == (2^n)*(b mod h) + a - r*int(b/h).
1879 * The above proof leads to the following calc resource file which computes
1880 * the value z mod h*2^n+r:
1882 * define hnrmod(v,h,n,r)
1884 * local a,b,modulus,tquo,tmod,lbit,ret;
1886 * if (!isint(h) || h < 1) {
1887 * quit "h must be an integer be > 0";
1889 * if (!isint(n) || n < 1) {
1890 * quit "n must be an integer be > 0";
1892 * if (r != 1 && r != 0 && r != -1) {
1893 * quit "r must be -1, 0 or 1";
1896 * lbit = lowbit(h);
1897 * if (lbit > 0) {
1898 * n += lbit;
1899 * h >>= lbit;
1902 * modulus = h<<n+r;
1903 * if (modulus <= 2^31-1) {
1904 * return v % modulus;
1906 * ret = v;
1908 * do {
1909 * if (highbit(ret) < n) {
1910 * break;
1912 * b = ret>>n;
1913 * a = ret - (b<<n);
1915 * switch (r) {
1916 * case -1:
1917 * if (h == 1) {
1918 * ret = a + b;
1919 * } else {
1920 * quomod(b, h, tquo, tmod);
1921 * ret = tmod<<n + a + tquo;
1923 * break;
1924 * case 0:
1925 * if (h == 1) {
1926 * ret = a;
1927 * } else {
1928 * ret = (b%h)<<n + a;
1930 * break;
1931 * case 1:
1932 * if (h == 1) {
1933 * ret = ((a > b) ? a-b : modulus+a-b);
1934 * } else {
1935 * quomod(b, h, tquo, tmod);
1936 * tmod = tmod<<n + a;
1937 * ret = ((tmod >= tquo) ? tmod-tquo : modulus+tmod-tquo);
1939 * break;
1941 * } while (ret > modulus);
1942 * ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret));
1944 * return ret;
1947 * This function implements the above calc resource file.
1949 * given:
1950 * v take mod of this value, v >= 0
1951 * zh h from modulus h*2^n+r, h > 0
1952 * zn n from modulus h*2^n+r, n > 0
1953 * zr r from modulus h*2^n+r, abs(r) <= 1
1954 * res v mod h*2^n+r
1956 void
1957 zhnrmod(ZVALUE v, ZVALUE zh, ZVALUE zn, ZVALUE zr, ZVALUE *res)
1959 ZVALUE a; /* lower n bits of v */
1960 ZVALUE b; /* bits above the lower n bits of v */
1961 ZVALUE h; /* working zh value */
1962 ZVALUE modulus; /* h^2^n + r */
1963 ZVALUE tquo; /* b // h */
1964 ZVALUE tmod; /* b % h or (b%h)<<n + a */
1965 ZVALUE t; /* temp ZVALUE */
1966 ZVALUE t2; /* temp ZVALUE */
1967 ZVALUE ret; /* return value, what *res is set to */
1968 long n; /* integer value of zn */
1969 long r; /* integer value of zr */
1970 long hbit; /* highbit(res) */
1971 long lbit; /* lowbit(h) */
1972 int zrelval; /* return value of zrel() */
1973 int hisone; /* 1 => h == 1, 0 => h != 1 */
1976 * firewall
1978 if (zisneg(zh) || ziszero(zh)) {
1979 math_error("h must be > 0");
1980 /*NOTREACHED*/
1982 if (zisneg(zn) || ziszero(zn)) {
1983 math_error("n must be > 0");
1984 /*NOTREACHED*/
1986 if (zge31b(zn)) {
1987 math_error("n must be < 2^31");
1988 /*NOTREACHED*/
1990 if (!zisabsleone(zr)) {
1991 math_error("r must be -1, 0 or 1");
1992 /*NOTREACHED*/
1997 * setup for loop
1999 n = ztolong(zn);
2000 r = ztolong(zr);
2001 if (zisneg(zr)) {
2002 r = -r;
2004 /* lbit = lowbit(h); */
2005 lbit = zlowbit(zh);
2006 /* if (lbit > 0) { n += lbit; h >>= lbit; } */
2007 if (lbit > 0) {
2008 n += lbit;
2009 zshift(zh, -lbit, &h);
2010 } else {
2011 h = zh;
2013 /* modulus = h<<n+r; */
2014 zshift(h, n, &t);
2015 switch (r) {
2016 case 1:
2017 zadd(t, _one_, &modulus);
2018 zfree(t);
2019 break;
2020 case 0:
2021 modulus = t;
2022 break;
2023 case -1:
2024 zsub(t, _one_, &modulus);
2025 zfree(t);
2026 break;
2028 /* if (modulus <= MAXLONG) { return v % modulus; } */
2029 if (!zgtmaxlong(modulus)) {
2030 itoz(zmodi(v, ztolong(modulus)), res);
2031 zfree(modulus);
2032 if (lbit > 0) {
2033 zfree(h);
2035 return;
2037 /* ret = v; */
2038 zcopy(v, &ret);
2041 * shift-add modulus loop
2043 hisone = zisone(h);
2044 do {
2047 * split ret into to chunks, the lower n bits
2048 * and everything above the lower n bits
2050 /* if (highbit(ret) < n) { break; } */
2051 hbit = (long)zhighbit(ret);
2052 if (hbit < n) {
2053 zrelval = (zcmp(ret, modulus) ? -1 : 0);
2054 break;
2056 /* b = ret>>n; */
2057 zshift(ret, -n, &b);
2058 b.sign = ret.sign;
2059 /* a = ret - (b<<n); */
2060 a.sign = ret.sign;
2061 a.len = (n+BASEB-1)/BASEB;
2062 a.v = alloc(a.len);
2063 memcpy(a.v, ret.v, a.len*sizeof(HALF));
2064 if (n % BASEB) {
2065 a.v[a.len - 1] &= lowhalf[n % BASEB];
2067 ztrim(&a);
2070 * switch depending on r == -1, 0 or 1
2072 switch (r) {
2073 case -1: /* v mod h*2^h-1 */
2074 /* if (h == 1) ... */
2075 if (hisone) {
2076 /* ret = a + b; */
2077 zfree(ret);
2078 zadd(a, b, &ret);
2080 /* ... else ... */
2081 } else {
2082 /* quomod(b, h, tquo, tmod); */
2083 (void) zdiv(b, h, &tquo, &tmod, 0);
2084 /* ret = tmod<<n + a + tquo; */
2085 zshift(tmod, n, &t);
2086 zfree(tmod);
2087 zadd(a, tquo, &t2);
2088 zfree(tquo);
2089 zfree(ret);
2090 zadd(t, t2, &ret);
2091 zfree(t);
2092 zfree(t2);
2094 break;
2096 case 0: /* v mod h*2^h-1 */
2097 /* if (h == 1) ... */
2098 if (hisone) {
2099 /* ret = a; */
2100 zfree(ret);
2101 zcopy(a, &ret);
2103 /* ... else ... */
2104 } else {
2105 /* ret = (b%h)<<n + a; */
2106 (void) zmod(b, h, &tmod, 0);
2107 zshift(tmod, n, &t);
2108 zfree(tmod);
2109 zfree(ret);
2110 zadd(t, a, &ret);
2111 zfree(t);
2113 break;
2115 case 1: /* v mod h*2^h-1 */
2116 /* if (h == 1) ... */
2117 if (hisone) {
2118 /* ret = a-b; */
2119 zfree(ret);
2120 zsub(a, b, &ret);
2122 /* ... else ... */
2123 } else {
2124 /* quomod(b, h, tquo, tmod); */
2125 (void) zdiv(b, h, &tquo, &tmod, 0);
2126 /* tmod = tmod<<n + a; */
2127 zshift(tmod, n, &t);
2128 zfree(tmod);
2129 zadd(t, a, &tmod);
2130 zfree(t);
2131 /* ret = tmod-tquo; */
2132 zfree(ret);
2133 zsub(tmod, tquo, &ret);
2134 zfree(tquo);
2135 zfree(tmod);
2137 break;
2139 zfree(a);
2140 zfree(b);
2142 /* ... while (abs(ret) > modulus); */
2143 } while ((zrelval = zabsrel(ret, modulus)) > 0);
2144 /* ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret)); */
2145 if (ret.sign) {
2146 zadd(ret, modulus, &t);
2147 zfree(ret);
2148 ret = t;
2149 } else if (zrelval == 0) {
2150 zfree(ret);
2151 ret = _zero_;
2153 zfree(modulus);
2154 if (lbit > 0) {
2155 zfree(h);
2159 * return ret
2161 *res = ret;
2162 return;