c_cpp/etc/calc/zmod.c

   1 /*
   2  * zmod - modulo arithmetic routines
   3  *
   4  * Copyright (C) 1999-2007  David I. Bell, Landon Curt Noll and Ernest Bowen
   5  *
   6  * Primary author:  David I. Bell
   7  *
   8  * Calc is open software; you can redistribute it and/or modify it under
   9  * the terms of the version 2.1 of the GNU Lesser General Public License
  10  * as published by the Free Software Foundation.
  11  *
  12  * Calc is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  15  * Public License for more details.
  16  *
  17  * A copy of version 2.1 of the GNU Lesser General Public License is
  18  * distributed with calc under the filename COPYING-LGPL.  You should have
  19  * received a copy with calc; if not, write to Free Software Foundation, Inc.
  20  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  21  *
  22  * @(#) $Revision: 30.1 $
  23  * @(#) $Id: zmod.c,v 30.1 2007/03/16 11:09:46 chongo Exp $
  24  * @(#) $Source: /usr/local/src/bin/calc/RCS/zmod.c,v $
  25  *
  26  * Under source code control:   1991/05/22 23:03:55
  27  * File existed as early as:    1991
  28  *
  29  * Share and enjoy!  :-)        http://www.isthe.com/chongo/tech/comp/calc/
  30  */
  31
  32 /*
  33  * Routines to do modulo arithmetic both normally and also using the REDC
  34  * algorithm given by Peter L. Montgomery in Mathematics of Computation,
  35  * volume 44, number 170 (April, 1985).  For multiple multiplies using
  36  * the same large modulus, the REDC algorithm avoids the usual division
  37  * by the modulus, instead replacing it with two multiplies or else a
  38  * special algorithm.  When these two multiplies or the special algorithm
  39  * are faster then the division, then the REDC algorithm is better.
  40  */
  41
  42
  43 #include "config.h"
  44 #include "zmath.h"
  45
  46
  47 #define POWBITS 4               /* bits for power chunks (must divide BASEB) */
  48 #define POWNUMS (1<<POWBITS)    /* number of powers needed in table */
  49
  50 S_FUNC void zmod5(ZVALUE *zp);
  51 S_FUNC void zmod6(ZVALUE z1, ZVALUE *res);
  52 S_FUNC void zredcmodinv(ZVALUE z1, ZVALUE *res);
  53
  54 STATIC REDC *powermodredc = NULL;       /* REDC info for raising to power */
  55
  56 BOOL havelastmod = FALSE;
  57 STATIC ZVALUE lastmod[1];
  58 STATIC ZVALUE lastmodinv[1];
  59
  60
  61 /*
  62  * Square a number and then mod the result with a second number.
  63  * The number to be squared can be negative or out of modulo range.
  64  * The result will be in the range 0 to the modulus - 1.
  65  *
  66  * given:
  67  *      z1              number to be squared
  68  *      z2              number to take mod with
  69  *      res             result
  70  */
  71 void
  72 zsquaremod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
  73 {
  74         ZVALUE tmp;
  75         FULL prod;
  76         FULL digit;
  77
  78         if (ziszero(z2) || zisneg(z2))
  79                 math_error("Mod of non-positive integer");
  80                 /*NOTREACHED*/
  81         if (ziszero(z1) || zisunit(z2)) {
  82                 *res = _zero_;
  83                 return;
  84         }
  85
  86         /*
  87          * If the modulus is a single digit number, then do the result
  88          * cheaply.  Check especially for a small power of two.
  89          */
  90         if (zistiny(z2)) {
  91                 digit = z2.v[0];
  92                 if ((digit & -digit) == digit) {        /* NEEDS 2'S COMP */
  93                         prod = (FULL) z1.v[0];
  94                         prod = (prod * prod) & (digit - 1);
  95                 } else {
  96                         z1.sign = 0;
  97                         prod = (FULL) zmodi(z1, (long) digit);
  98                         prod = (prod * prod) % digit;
  99                 }
 100                 itoz((long) prod, res);
 101                 return;
 102         }
 103
 104         /*
 105          * The modulus is more than one digit.
 106          * Actually do the square and divide if necessary.
 107          */
 108         zsquare(z1, &tmp);
 109         if ((tmp.len < z2.len) ||
 110                 ((tmp.len == z2.len) && (tmp.v[tmp.len-1] < z2.v[z2.len-1]))) {
 111                         *res = tmp;
 112                         return;
 113         }
 114         zmod(tmp, z2, res, 0);
 115         zfree(tmp);
 116 }
 117
 118
 119 /*
 120  * Calculate the number congruent to the given number whose absolute
 121  * value is minimal.  The number to be reduced can be negative or out of
 122  * modulo range.  The result will be within the range -int((modulus-1)/2)
 123  * to int(modulus/2) inclusive.  For example, for modulus 7, numbers are
 124  * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to
 125  * the range [-3, 4].
 126  *
 127  * given:
 128  *      z1              number to find minimum congruence of
 129  *      z2              number to take mod with
 130  *      res             result
 131  */
 132 void
 133 zminmod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
 134 {
 135         ZVALUE tmp1, tmp2;
 136         int sign;
 137         int cv;
 138
 139         if (ziszero(z2) || zisneg(z2)) {
 140                 math_error("Mod of non-positive integer");
 141                 /*NOTREACHED*/
 142         }
 143         if (ziszero(z1) || zisunit(z2)) {
 144                 *res = _zero_;
 145                 return;
 146         }
 147         if (zistwo(z2)) {
 148                 if (zisodd(z1))
 149                         *res = _one_;
 150                 else
 151                         *res = _zero_;
 152                 return;
 153         }
 154
 155         /*
 156          * Do a quick check to see if the number is very small compared
 157          * to the modulus.  If so, then the result is obvious.
 158          */
 159         if (z1.len < z2.len - 1) {
 160                 zcopy(z1, res);
 161                 return;
 162         }
 163
 164         /*
 165          * Now make sure the input number is within the modulo range.
 166          * If not, then reduce it to be within range and make the
 167          * quick check again.
 168          */
 169         sign = z1.sign;
 170         z1.sign = 0;
 171         cv = zrel(z1, z2);
 172         if (cv == 0) {
 173                 *res = _zero_;
 174                 return;
 175         }
 176         tmp1 = z1;
 177         if (cv > 0) {
 178                 z1.sign = (BOOL)sign;
 179                 zmod(z1, z2, &tmp1, 0);
 180                 if (tmp1.len < z2.len - 1) {
 181                         *res = tmp1;
 182                         return;
 183                 }
 184                 sign = 0;
 185         }
 186
 187         /*
 188          * Now calculate the difference of the modulus and the absolute
 189          * value of the original number.  Compare the original number with
 190          * the difference, and return the one with the smallest absolute
 191          * value, with the correct sign.  If the two values are equal, then
 192          * return the positive result.
 193          */
 194         zsub(z2, tmp1, &tmp2);
 195         cv = zrel(tmp1, tmp2);
 196         if (cv < 0) {
 197                 zfree(tmp2);
 198                 tmp1.sign = (BOOL)sign;
 199                 if (tmp1.v == z1.v)
 200                         zcopy(tmp1, res);
 201                 else
 202                         *res = tmp1;
 203         } else {
 204                 if (cv)
 205                         tmp2.sign = !sign;
 206                 if (tmp1.v != z1.v)
 207                         zfree(tmp1);
 208                 *res = tmp2;
 209         }
 210 }
 211
 212
 213 /*
 214  * Compare two numbers for equality modulo a third number.
 215  * The two numbers to be compared can be negative or out of modulo range.
 216  * Returns TRUE if the numbers are not congruent, and FALSE if they are
 217  * congruent.
 218  *
 219  * given:
 220  *      z1              first number to be compared
 221  *      z2              second number to be compared
 222  *      z3              modulus
 223  */
 224 BOOL
 225 zcmpmod(ZVALUE z1, ZVALUE z2, ZVALUE z3)
 226 {
 227         ZVALUE tmp1, tmp2, tmp3;
 228         FULL digit;
 229         LEN len;
 230         int cv;
 231
 232         if (zisneg(z3) || ziszero(z3)) {
 233                 math_error("Non-positive modulus in zcmpmod");
 234                 /*NOTREACHED*/
 235         }
 236         if (zistwo(z3))
 237                 return (((z1.v[0] + z2.v[0]) & 0x1) != 0);
 238
 239         /*
 240          * If the two numbers are equal, then their mods are equal.
 241          */
 242         if ((z1.sign == z2.sign) && (z1.len == z2.len) &&
 243                 (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0))
 244                         return FALSE;
 245
 246         /*
 247          * If both numbers are negative, then we can make them positive.
 248          */
 249         if (zisneg(z1) && zisneg(z2)) {
 250                 z1.sign = 0;
 251                 z2.sign = 0;
 252         }
 253
 254         /*
 255          * For small negative numbers, make them positive before comparing.
 256          * In any case, the resulting numbers are in tmp1 and tmp2.
 257          */
 258         tmp1 = z1;
 259         tmp2 = z2;
 260         len = z3.len;
 261         digit = z3.v[len - 1];
 262
 263         if (zisneg(z1) && ((z1.len < len) ||
 264                 ((z1.len == len) && (z1.v[z1.len - 1] < digit))))
 265                         zadd(z1, z3, &tmp1);
 266
 267         if (zisneg(z2) && ((z2.len < len) ||
 268                 ((z2.len == len) && (z2.v[z2.len - 1] < digit))))
 269                         zadd(z2, z3, &tmp2);
 270
 271         /*
 272          * Now compare the two numbers for equality.
 273          * If they are equal we are all done.
 274          */
 275         if (zcmp(tmp1, tmp2) == 0) {
 276                 if (tmp1.v != z1.v)
 277                         zfree(tmp1);
 278                 if (tmp2.v != z2.v)
 279                         zfree(tmp2);
 280                 return FALSE;
 281         }
 282
 283         /*
 284          * They are not identical.  Now if both numbers are positive
 285          * and less than the modulus, then they are definitely not equal.
 286          */
 287         if ((tmp1.sign == tmp2.sign) &&
 288                 ((tmp1.len < len) || (zrel(tmp1, z3) < 0)) &&
 289                 ((tmp2.len < len) || (zrel(tmp2, z3) < 0))) {
 290                 if (tmp1.v != z1.v)
 291                         zfree(tmp1);
 292                 if (tmp2.v != z2.v)
 293                         zfree(tmp2);
 294                 return TRUE;
 295         }
 296
 297         /*
 298          * Either one of the numbers is negative or is large.
 299          * So do the standard thing and subtract the two numbers.
 300          * Then they are equal if the result is 0 (mod z3).
 301          */
 302         zsub(tmp1, tmp2, &tmp3);
 303         if (tmp1.v != z1.v)
 304                 zfree(tmp1);
 305         if (tmp2.v != z2.v)
 306                 zfree(tmp2);
 307
 308         /*
 309          * Compare the result with the modulus to see if it is equal to
 310          * or less than the modulus.  If so, we know the mod result.
 311          */
 312         tmp3.sign = 0;
 313         cv = zrel(tmp3, z3);
 314         if (cv == 0) {
 315                 zfree(tmp3);
 316                 return FALSE;
 317         }
 318         if (cv < 0) {
 319                 zfree(tmp3);
 320                 return TRUE;
 321         }
 322
 323         /*
 324          * We are forced to actually do the division.
 325          * The numbers are congruent if the result is zero.
 326          */
 327         zmod(tmp3, z3, &tmp1, 0);
 328         zfree(tmp3);
 329         if (ziszero(tmp1)) {
 330                 zfree(tmp1);
 331                 return FALSE;
 332         } else {
 333                 zfree(tmp1);
 334                 return TRUE;
 335         }
 336 }
 337
 338
 339 /*
 340  * Given the address of a positive integer whose word count does not
 341  * exceed twice that of the modulus stored at lastmod, to evaluate and store
 342  * at that address the value of the integer modulo the modulus.
 343  */
 344 S_FUNC void
 345 zmod5(ZVALUE *zp)
 346 {
 347         LEN len, modlen, j;
 348         ZVALUE tmp1, tmp2;
 349         ZVALUE z1, z2, z3;
 350         HALF *a, *b;
 351         FULL f;
 352         HALF u;
 353
 354         int subcount = 0;
 355
 356         if (zrel(*zp, *lastmod) < 0)
 357                 return;
 358         modlen = lastmod->len;
 359         len = zp->len;
 360         z1.v = zp->v + modlen - 1;
 361         z1.len = len - modlen + 1;
 362         z1.sign = z2.sign = z3.sign = 0;
 363         if (z1.len > modlen + 1) {
 364                 math_error("Bad call to zmod5!!!");
 365                 /*NOTREACHED*/
 366         }
 367         z2.v = lastmodinv->v + modlen + 1 - z1.len;
 368         z2.len = lastmodinv->len - modlen - 1 + z1.len;
 369         zmul(z1, z2, &tmp1);
 370         z3.v = tmp1.v + z1.len;
 371         z3.len = tmp1.len - z1.len;
 372         if (z3.len > 0) {
 373                 zmul(z3, *lastmod, &tmp2);
 374                 j = modlen;
 375                 a = zp->v;
 376                 b = tmp2.v;
 377                 u = 0;
 378                 len = modlen;
 379                 while (j-- > 0) {
 380                         f = (FULL) *a - (FULL) *b++ - (FULL) u;
 381                         *a++ = (HALF) f;
 382                         u = - (HALF) (f >> BASEB);
 383                 }
 384                 if (z1.len > 1) {
 385                         len++;
 386                         if (tmp2.len > modlen)
 387                                 f = (FULL) *a - (FULL) *b - (FULL) u;
 388                         else
 389                                 f = (FULL) *a - (FULL) u;
 390                         *a++ = (HALF) f;
 391                 }
 392                 while (len > 0 && *--a == 0)
 393                         len--;
 394                 zp->len = len;
 395                 zfree(tmp2);
 396         }
 397         zfree(tmp1);
 398         while (len > 0 && zrel(*zp, *lastmod) >= 0) {
 399                 subcount++;
 400                 if (subcount > 2) {
 401                         math_error("Too many subtractions in zmod5");
 402                         /*NOTREACHED*/
 403                 }
 404                 j = modlen;
 405                 a = zp->v;
 406                 b = lastmod->v;
 407                 u = 0;
 408                 while (j-- > 0) {
 409                         f = (FULL) *a - (FULL) *b++ - (FULL) u;
 410                         *a++ = (HALF) f;
 411                         u = - (HALF) (f >> BASEB);
 412                 }
 413                 if (len > modlen) {
 414                         f = (FULL) *a - (FULL) u;
 415                         *a++ = (HALF) f;
 416                 }
 417                 while (len > 0 && *--a == 0)
 418                         len--;
 419                 zp->len = len;
 420         }
 421         if (len == 0)
 422                 zp->len = 1;
 423 }
 424
 425 S_FUNC void
 426 zmod6(ZVALUE z1, ZVALUE *res)
 427 {
 428         LEN len, modlen, len0;
 429         int sign;
 430         ZVALUE zp0, ztmp;
 431
 432         if (ziszero(z1) || zisone(*lastmod)) {
 433                 *res = _zero_;
 434                 return;
 435         }
 436         sign = z1.sign;
 437         z1.sign = 0;
 438         zcopy(z1, &ztmp);
 439         modlen = lastmod->len;
 440         zp0.sign = 0;
 441         while (zrel(ztmp, *lastmod) >= 0) {
 442                 len = ztmp.len;
 443                 zp0.len = len;
 444                 len0 = 0;
 445                 if (len > 2 * modlen) {
 446                         zp0.len = 2 * modlen;
 447                         len0 = len - 2 * modlen;
 448                 }
 449                 zp0.v = ztmp.v + len - zp0.len;
 450                 zmod5(&zp0);
 451                 len = len0 + zp0.len;
 452                 while (len > 0 && ztmp.v[len - 1] == 0)
 453                         len--;
 454                 if (len == 0) {
 455                         zfree(ztmp);
 456                         *res = _zero_;
 457                         return;
 458                 }
 459                 ztmp.len = len;
 460         }
 461         if (sign)
 462                 zsub(*lastmod, ztmp, res);
 463         else
 464                 zcopy(ztmp, res);
 465         zfree(ztmp);
 466 }
 467
 468
 469
 470 /*
 471  * Compute the result of raising one number to a power modulo another number.
 472  * That is, this computes:  a^b (modulo c).
 473  * This calculates the result by examining the power POWBITS bits at a time,
 474  * using a small table of POWNUMS low powers to calculate powers for those bits,
 475  * and repeated squaring and multiplying by the partial powers to generate
 476  * the complete power.  If the power being raised to is high enough, then
 477  * this uses the REDC algorithm to avoid doing many divisions.  When using
 478  * REDC, multiple calls to this routine using the same modulus will be
 479  * slightly faster.
 480  */
 481 void
 482 zpowermod(ZVALUE z1, ZVALUE z2, ZVALUE z3, ZVALUE *res)
 483 {
 484         HALF *hp;               /* pointer to current word of the power */
 485         REDC *rp;               /* REDC information to be used */
 486         ZVALUE *pp;             /* pointer to low power table */
 487         ZVALUE ans, temp;       /* calculation values */
 488         ZVALUE modpow;          /* current small power */
 489         ZVALUE lowpowers[POWNUMS];      /* low powers */
 490         ZVALUE ztmp;
 491         int curshift;           /* shift value for word of power */
 492         HALF curhalf;           /* current word of power */
 493         unsigned int curpow;    /* current low power */
 494         unsigned int curbit;    /* current bit of low power */
 495         int i;
 496
 497         if (zisneg(z3) || ziszero(z3)) {
 498                 math_error("Non-positive modulus in zpowermod");
 499                 /*NOTREACHED*/
 500         }
 501         if (zisneg(z2)) {
 502                 math_error("Negative power in zpowermod");
 503                 /*NOTREACHED*/
 504         }
 505
 506
 507         /*
 508          * Check easy cases first.
 509          */
 510         if ((ziszero(z1) && !ziszero(z2)) || zisunit(z3)) {
 511                 /* 0^(non_zero) or x^y mod 1 always produces zero */
 512                 *res = _zero_;
 513                 return;
 514         }
 515         if (ziszero(z2)) {                      /* x^0 == 1 */
 516                 *res = _one_;
 517                 return;
 518         }
 519         if (zistwo(z3)) {                       /* mod 2 */
 520                 if (zisodd(z1))
 521                         *res = _one_;
 522                 else
 523                         *res = _zero_;
 524                 return;
 525         }
 526         if (zisunit(z1) && (!z1.sign || ziseven(z2))) {
 527                 /* 1^x or (-1)^(2x) */
 528                 *res = _one_;
 529                 return;
 530         }
 531
 532         /*
 533          * Normalize the number being raised to be non-negative and to lie
 534          * within the modulo range.  Then check for zero or one specially.
 535          */
 536         ztmp.len = 0;
 537         if (zisneg(z1) || zrel(z1, z3) >= 0) {
 538                 zmod(z1, z3, &ztmp, 0);
 539                 z1 = ztmp;
 540         }
 541         if (ziszero(z1)) {
 542                 if (ztmp.len)
 543                         zfree(ztmp);
 544                 *res = _zero_;
 545                 return;
 546         }
 547         if (zisone(z1)) {
 548                 if (ztmp.len)
 549                         zfree(ztmp);
 550                 *res = _one_;
 551                 return;
 552         }
 553
 554         /*
 555          * If modulus is large enough use zmod5
 556          */
 557         if (z3.len >= conf->pow2) {
 558                 if (havelastmod && zcmp(z3, *lastmod)) {
 559                         zfree(*lastmod);
 560                         zfree(*lastmodinv);
 561                         havelastmod = FALSE;
 562                 }
 563                 if (!havelastmod) {
 564                         zcopy(z3, lastmod);
 565                         zbitvalue(2 * z3.len * BASEB, &temp);
 566                         zquo(temp, z3, lastmodinv, 0);
 567                         zfree(temp);
 568                         havelastmod = TRUE;
 569                 }
 570
 571                 /* zzz */
 572                 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
 573                         pp->len = 0;
 574                         pp->v = NULL;
 575                 }
 576                 lowpowers[0] = _one_;
 577                 lowpowers[1] = z1;
 578                 ans = _one_;
 579
 580                 hp = &z2.v[z2.len - 1];
 581                 curhalf = *hp;
 582                 curshift = BASEB - POWBITS;
 583                 while (curshift && ((curhalf >> curshift) == 0))
 584                         curshift -= POWBITS;
 585
 586                 /*
 587                  * Calculate the result by examining the power POWBITS bits at
 588                  * a time, and use the table of low powers at each iteration.
 589                  */
 590                 for (;;) {
 591                         curpow = (curhalf >> curshift) & (POWNUMS - 1);
 592                         pp = &lowpowers[curpow];
 593
 594                         /*
 595                          * If the small power is not yet saved in the table,
 596                          * then calculate it and remember it in the table for
 597                          * future use.
 598                          */
 599                         if (pp->v == NULL) {
 600                                 if (curpow & 0x1)
 601                                         zcopy(z1, &modpow);
 602                                 else
 603                                         modpow = _one_;
 604
 605                                 for (curbit = 0x2;
 606                                      curbit <= curpow;
 607                                      curbit *= 2) {
 608                                         pp = &lowpowers[curbit];
 609                                         if (pp->v == NULL) {
 610                                                 zsquare(lowpowers[curbit/2],
 611                                                         &temp);
 612                                                 zmod5(&temp);
 613                                                 zcopy(temp, pp);
 614                                                 zfree(temp);
 615                                         }
 616                                         if (curbit & curpow) {
 617                                                 zmul(*pp, modpow, &temp);
 618                                                 zfree(modpow);
 619                                                 zmod5(&temp);
 620                                                 zcopy(temp, &modpow);
 621                                                 zfree(temp);
 622                                         }
 623                                 }
 624                                 pp = &lowpowers[curpow];
 625                                 if (pp->v != NULL) {
 626                                         zfree(*pp);
 627                                 }
 628                                 *pp = modpow;
 629                         }
 630
 631                         /*
 632                          * If the power is nonzero, then accumulate the small
 633                          * power into the result.
 634                          */
 635                         if (curpow) {
 636                                 zmul(ans, *pp, &temp);
 637                                 zfree(ans);
 638                                 zmod5(&temp);
 639                                 zcopy(temp, &ans);
 640                                 zfree(temp);
 641                         }
 642
 643                         /*
 644                          * Select the next POWBITS bits of the power, if
 645                          * there is any more to generate.
 646                          */
 647                         curshift -= POWBITS;
 648                         if (curshift < 0) {
 649                                 if (hp == z2.v)
 650                                         break;
 651                                 curhalf = *--hp;
 652                                 curshift = BASEB - POWBITS;
 653                         }
 654
 655                         /*
 656                          * Square the result POWBITS times to make room for
 657                          * the next chunk of bits.
 658                          */
 659                         for (i = 0; i < POWBITS; i++) {
 660                                 zsquare(ans, &temp);
 661                                 zfree(ans);
 662                                 zmod5(&temp);
 663                                 zcopy(temp, &ans);
 664                                 zfree(temp);
 665                         }
 666                 }
 667
 668                 for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
 669                         if (pp->v != NULL)
 670                                 freeh(pp->v);
 671                 }
 672                 *res = ans;
 673                 if (ztmp.len)
 674                         zfree(ztmp);
 675                 return;
 676         }
 677
 678         /*
 679          * If the modulus is odd and small enough then use
 680          * the REDC algorithm.  The size where this is done is configurable.
 681          */
 682         if (z3.len < conf->redc2 && zisodd(z3)) {
 683                 if (powermodredc && zcmp(powermodredc->mod, z3)) {
 684                         zredcfree(powermodredc);
 685                         powermodredc = NULL;
 686                 }
 687                 if (powermodredc == NULL)
 688                         powermodredc = zredcalloc(z3);
 689                 rp = powermodredc;
 690                 zredcencode(rp, z1, &temp);
 691                 zredcpower(rp, temp, z2, &z1);
 692                 zfree(temp);
 693                 zredcdecode(rp, z1, res);
 694                 zfree(z1);
 695                 return;
 696         }
 697
 698         /*
 699          * Modulus or power is small enough to perform the power raising
 700          * directly.  Initialize the table of powers.
 701          */
 702         for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
 703                 pp->len = 0;
 704                 pp->v = NULL;
 705         }
 706         lowpowers[0] = _one_;
 707         lowpowers[1] = z1;
 708         ans = _one_;
 709
 710         hp = &z2.v[z2.len - 1];
 711         curhalf = *hp;
 712         curshift = BASEB - POWBITS;
 713         while (curshift && ((curhalf >> curshift) == 0))
 714                 curshift -= POWBITS;
 715
 716         /*
 717          * Calculate the result by examining the power POWBITS bits at a time,
 718          * and use the table of low powers at each iteration.
 719          */
 720         for (;;) {
 721                 curpow = (curhalf >> curshift) & (POWNUMS - 1);
 722                 pp = &lowpowers[curpow];
 723
 724                 /*
 725                  * If the small power is not yet saved in the table, then
 726                  * calculate it and remember it in the table for future use.
 727                  */
 728                 if (pp->v == NULL) {
 729                         if (curpow & 0x1)
 730                                 zcopy(z1, &modpow);
 731                         else
 732                                 modpow = _one_;
 733
 734                         for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
 735                                 pp = &lowpowers[curbit];
 736                                 if (pp->v == NULL) {
 737                                         zsquare(lowpowers[curbit/2], &temp);
 738                                         zmod(temp, z3, pp, 0);
 739                                         zfree(temp);
 740                                 }
 741                                 if (curbit & curpow) {
 742                                         zmul(*pp, modpow, &temp);
 743                                         zfree(modpow);
 744                                         zmod(temp, z3, &modpow, 0);
 745                                         zfree(temp);
 746                                 }
 747                         }
 748                         pp = &lowpowers[curpow];
 749                         if (pp->v != NULL) {
 750                                 zfree(*pp);
 751                         }
 752                         *pp = modpow;
 753                 }
 754
 755                 /*
 756                  * If the power is nonzero, then accumulate the small power
 757                  * into the result.
 758                  */
 759                 if (curpow) {
 760                         zmul(ans, *pp, &temp);
 761                         zfree(ans);
 762                         zmod(temp, z3, &ans, 0);
 763                         zfree(temp);
 764                 }
 765
 766                 /*
 767                  * Select the next POWBITS bits of the power, if there is
 768                  * any more to generate.
 769                  */
 770                 curshift -= POWBITS;
 771                 if (curshift < 0) {
 772                         if (hp-- == z2.v)
 773                                 break;
 774                         curhalf = *hp;
 775                         curshift = BASEB - POWBITS;
 776                 }
 777
 778                 /*
 779                  * Square the result POWBITS times to make room for the next
 780                  * chunk of bits.
 781                  */
 782                 for (i = 0; i < POWBITS; i++) {
 783                         zsquare(ans, &temp);
 784                         zfree(ans);
 785                         zmod(temp, z3, &ans, 0);
 786                         zfree(temp);
 787                 }
 788         }
 789
 790         for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
 791                 if (pp->v != NULL)
 792                         freeh(pp->v);
 793         }
 794         *res = ans;
 795         if (ztmp.len)
 796                 zfree(ztmp);
 797 }
 798
 799 /*
 800  * Given a positive odd N-word integer z, evaluate minv(-z, BASEB^N)
 801  */
 802 S_FUNC void
 803 zredcmodinv(ZVALUE z, ZVALUE *res)
 804 {
 805         ZVALUE tmp;
 806         HALF *a0, *a, *b;
 807         HALF bit, h, inv, v;
 808         FULL f;
 809         LEN N, i, j, len;
 810
 811         N = z.len;
 812         tmp.sign = 0;
 813         tmp.len = N;
 814         tmp.v = alloc(N);
 815         zclearval(tmp);
 816         *tmp.v = 1;
 817         h = 1 + *z.v;
 818         bit = 1;
 819         inv = 1;
 820         while (h) {
 821                 bit <<= 1;
 822                 if (bit & h) {
 823                         inv |= bit;
 824                         h += bit * *z.v;
 825                 }
 826         }
 827
 828         j = N;
 829         a0 = tmp.v;
 830         while (j-- > 0) {
 831                 v = inv * *a0;
 832                 i = j;
 833                 a = a0;
 834                 b = z.v;
 835                 f = (FULL) v * (FULL) *b++ + (FULL) *a++;
 836                 *a0 = v;
 837                 while (i-- > 0) {
 838                         f = (FULL) v * (FULL) *b++  + (FULL) *a + (f >> BASEB);
 839                         *a++ = (HALF) f;
 840                 }
 841                 while (j > 0 && *++a0 == 0)
 842                         j--;
 843         }
 844         a = tmp.v + N;
 845         len = N;
 846         while (*--a == 0)
 847                 len--;
 848         tmp.len = len;
 849         zcopy(tmp, res);
 850         zfree(tmp);
 851 }
 852
 853
 854 /*
 855  * Initialize the REDC algorithm for a particular modulus,
 856  * returning a pointer to a structure that is used for other
 857  * REDC calls.  An error is generated if the structure cannot
 858  * be allocated.  The modulus must be odd and positive.
 859  *
 860  * given:
 861  *      z1              modulus to initialize for
 862  */
 863 REDC *
 864 zredcalloc(ZVALUE z1)
 865 {
 866         REDC *rp;               /* REDC information */
 867         ZVALUE tmp;
 868         long bit;
 869
 870         if (ziseven(z1) || zisneg(z1)) {
 871                 math_error("REDC requires positive odd modulus");
 872                 /*NOTREACHED*/
 873         }
 874
 875         rp = (REDC *) malloc(sizeof(REDC));
 876         if (rp == NULL) {
 877                 math_error("Cannot allocate REDC structure");
 878                 /*NOTREACHED*/
 879         }
 880
 881         /*
 882          * Round up the binary modulus to the next power of two
 883          * which is at a word boundary.  Then the shift and modulo
 884          * operations mod the binary modulus can be done very cheaply.
 885          * Calculate the REDC format for the number 1 for future use.
 886          */
 887         zcopy(z1, &rp->mod);
 888         zredcmodinv(z1, &rp->inv);
 889         bit = zhighbit(z1) + 1;
 890         if (bit % BASEB)
 891                 bit += (BASEB - (bit % BASEB));
 892         zbitvalue(bit, &tmp);
 893         zmod(tmp, rp->mod, &rp->one, 0);
 894         zfree(tmp);
 895         rp->len = (LEN)(bit / BASEB);
 896         return rp;
 897 }
 898
 899
 900 /*
 901  * Free any numbers associated with the specified REDC structure,
 902  * and then the REDC structure itself.
 903  *
 904  * given:
 905  *      rp              REDC information to be cleared
 906  */
 907 void
 908 zredcfree(REDC *rp)
 909 {
 910         zfree(rp->mod);
 911         zfree(rp->inv);
 912         zfree(rp->one);
 913         free(rp);
 914 }
 915
 916
 917 /*
 918  * Convert a normal number into the specified REDC format.
 919  * The number to be converted can be negative or out of modulo range.
 920  * The resulting number can be used for multiplying, adding, subtracting,
 921  * or comparing with any other such converted numbers, as if the numbers
 922  * were being calculated modulo the number which initialized the REDC
 923  * information.  When the final value is unconverted, the result is the
 924  * same as if the usual operations were done with the original numbers.
 925  *
 926  * given:
 927  *      rp              REDC information
 928  *      z1              number to be converted
 929  *      res             returned converted number
 930  */
 931 void
 932 zredcencode(REDC *rp, ZVALUE z1, ZVALUE *res)
 933 {
 934         ZVALUE tmp1;
 935
 936         /*
 937          * Confirm or initialize lastmod information when modulus is a
 938          * big number.
 939          */
 940
 941         if (rp->len >= conf->pow2) {
 942                 if (havelastmod && zcmp(rp->mod, *lastmod)) {
 943                         zfree(*lastmod);
 944                         zfree(*lastmodinv);
 945                         havelastmod = FALSE;
 946                 }
 947                 if (!havelastmod) {
 948                         zcopy(rp->mod, lastmod);
 949                         zbitvalue(2 * rp->len * BASEB, &tmp1);
 950                         zquo(tmp1, rp->mod, lastmodinv, 0);
 951                         zfree(tmp1);
 952                         havelastmod = TRUE;
 953                 }
 954         }
 955         /*
 956          * Handle the cases 0, 1, -1, and 2 specially since these are
 957          * easy to calculate.  Zero transforms to zero, and the others
 958          * can be obtained from the precomputed REDC format for 1 since
 959          * addition and subtraction act normally for REDC format numbers.
 960          */
 961         if (ziszero(z1)) {
 962                 *res = _zero_;
 963                 return;
 964         }
 965         if (zisone(z1)) {
 966                 zcopy(rp->one, res);
 967                 return;
 968         }
 969         if (zisunit(z1)) {
 970                 zsub(rp->mod, rp->one, res);
 971                 return;
 972         }
 973         if (zistwo(z1)) {
 974                 zadd(rp->one, rp->one, &tmp1);
 975                 if (zrel(tmp1, rp->mod) < 0) {
 976                         *res = tmp1;
 977                         return;
 978                 }
 979                 zsub(tmp1, rp->mod, res);
 980                 zfree(tmp1);
 981                 return;
 982         }
 983
 984         /*
 985          * Not a trivial number to convert, so do the full transformation.
 986          */
 987         zshift(z1, rp->len * BASEB, &tmp1);
 988         if (rp->len < conf->pow2)
 989                 zmod(tmp1, rp->mod, res, 0);
 990         else
 991                 zmod6(tmp1, res);
 992         zfree(tmp1);
 993 }
 994
 995
 996 /*
 997  * The REDC algorithm used to convert numbers out of REDC format and also
 998  * used after multiplication of two REDC numbers.  Using this routine
 999  * avoids any divides, replacing the divide by two multiplications.
1000  * If the numbers are very large, then these two multiplies will be
1001  * quicker than the divide, since dividing is harder than multiplying.
1002  *
1003  * given:
1004  *      rp              REDC information
1005  *      z1              number to be transformed
1006  *      res             returned transformed number
1007  */
1008 void
1009 zredcdecode(REDC *rp, ZVALUE z1, ZVALUE *res)
1010 {
1011         ZVALUE tmp1, tmp2;
1012         ZVALUE ztmp;
1013         ZVALUE ztop;
1014         ZVALUE zp1;
1015         FULL muln;
1016         HALF *h1;
1017         HALF *h3;
1018         HALF *hd = NULL;
1019         HALF Ninv;
1020         LEN modlen;
1021         LEN len;
1022         FULL f;
1023         int sign;
1024         int i, j;
1025
1026         /*
1027          * Check first for the special values for 0 and 1 that are easy.
1028          */
1029         if (ziszero(z1)) {
1030                 *res = _zero_;
1031                 return;
1032         }
1033         if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1034                 (zcmp(z1, rp->one) == 0)) {
1035                         *res = _one_;
1036                         return;
1037         }
1038         ztop.len = 0;
1039         ztmp.len = 0;
1040         modlen = rp->len;
1041         sign = z1.sign;
1042         z1.sign = 0;
1043         if (z1.len > modlen) {
1044                 ztop.v = z1.v + modlen;
1045                 ztop.len = z1.len - modlen;
1046                 ztop.sign = 0;
1047                 if (zrel(ztop, rp->mod) >= 0) {
1048                         zmod(ztop, rp->mod, &ztmp, 0);
1049                         ztop = ztmp;
1050                 }
1051                 len = modlen;
1052                 h1 = z1.v + len;
1053                 while (len > 0 && *--h1 == 0)
1054                         len--;
1055                 if (len == 0) {
1056                         if (ztmp.len)
1057                                 *res = ztmp;
1058                         else
1059                                 zcopy(ztop, res);
1060                         return;
1061                 }
1062                 z1.len = len;
1063
1064         }
1065         if (rp->mod.len < conf->pow2) {
1066                 Ninv = rp->inv.v[0];
1067                 res->sign = 0;
1068                 res->len = modlen;
1069                 res->v = alloc(modlen);
1070                 zclearval(*res);
1071                 h1 = z1.v;
1072                 for (i = 0; i < modlen; i++) {
1073                         h3 = rp->mod.v;
1074                         hd = res->v;
1075                         f = (FULL) *hd++;
1076                         if (i < z1.len)
1077                                 f += (FULL) *h1++;
1078                         muln = (HALF) ((f & BASE1) * Ninv);
1079                         f = ((muln * (FULL) *h3++) + f) >> BASEB;
1080                         j = modlen;
1081                         while (--j > 0) {
1082                                 f += (muln * (FULL) *h3++) + (FULL) *hd;
1083                                 hd[-1] = (HALF) f;
1084                                 f >>= BASEB;
1085                                 hd++;
1086                         }
1087                         hd[-1] = (HALF) f;
1088                 }
1089                 len = modlen;
1090                 while (*--hd == 0 && len > 1)
1091                         len--;
1092                 if (len == 0)
1093                         len = 1;
1094                 res->len = len;
1095         } else {
1096                 /* Here 0 < z1 < 2^bitnum */
1097
1098                 /*
1099                  * First calculate the following:
1100                  *      tmp2 = ((z1 * inv) % 2^bitnum.
1101                  * The mod operations can be done with no work since the bit
1102                  * number was selected as a multiple of the word size.  Just
1103                  * reduce the sizes of the numbers as required.
1104                  */
1105                 zmul(z1, rp->inv, &tmp2);
1106                 if (tmp2.len > modlen) {
1107                         h1 = tmp2.v + modlen;
1108                         len = modlen;
1109                         while (len > 0 && *--h1 == 0)
1110                                 len--;
1111                         tmp2.len = len;
1112                 }
1113
1114                 /*
1115                  * Next calculate the following:
1116                  *      res = (z1 + tmp2 * modulus) / 2^bitnum
1117                  * Since 0 < z1 < 2^bitnum and the division is always exact,
1118                  * the quotient can be evaluated by rounding up
1119                  * (tmp2 * modulus)/2^bitnum.  This can be achieved by defining
1120                  * zp1 by an appropriate shift and then adding one.
1121                  */
1122                 zmul(tmp2, rp->mod, &tmp1);
1123                 zfree(tmp2);
1124                 if (tmp1.len > modlen) {
1125                         zp1.v = tmp1.v + modlen;
1126                         zp1.len = tmp1.len - modlen;
1127                         zp1.sign = 0;
1128                         zadd(zp1, _one_, res);
1129                 } else {
1130                         *res = _one_;
1131                 }
1132                 zfree(tmp1);
1133         }
1134         if (ztop.len) {
1135                 zadd(*res, ztop, &tmp1);
1136                 zfree(*res);
1137                 if (ztmp.len)
1138                         zfree(ztmp);
1139                 *res = tmp1;
1140         }
1141
1142         /*
1143          * Finally do a final modulo by a simple subtraction if necessary.
1144          * This is all that is needed because the previous calculation is
1145          * guaranteed to always be less than twice the modulus.
1146          */
1147
1148         if (zrel(*res, rp->mod) >= 0) {
1149                 zsub(*res, rp->mod, &tmp1);
1150                 zfree(*res);
1151                 *res = tmp1;
1152         }
1153         if (sign && !ziszero(*res)) {
1154                 zsub(rp->mod, *res, &tmp1);
1155                 zfree(*res);
1156                 *res = tmp1;
1157         }
1158         return;
1159 }
1160
1161
1162 /*
1163  * Multiply two numbers in REDC format together producing a result also
1164  * in REDC format.  If the result is converted back to a normal number,
1165  * then the result is the same as the modulo'd multiplication of the
1166  * original numbers before they were converted to REDC format.  This
1167  * calculation is done in one of two ways, depending on the size of the
1168  * modulus.  For large numbers, the REDC definition is used directly
1169  * which involves three multiplies overall.  For small numbers, a
1170  * complicated routine is used which does the indicated multiplication
1171  * and the REDC algorithm at the same time to produce the result.
1172  *
1173  * given:
1174  *      rp              REDC information
1175  *      z1              first REDC number to be multiplied
1176  *      z2              second REDC number to be multiplied
1177  *      res             resulting REDC number
1178  */
1179 void
1180 zredcmul(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
1181 {
1182         FULL mulb;
1183         FULL muln;
1184         HALF *h1;
1185         HALF *h2;
1186         HALF *h3;
1187         HALF *hd;
1188         HALF Ninv;
1189         HALF topdigit = 0;
1190         LEN modlen;
1191         LEN len;
1192         LEN len2;
1193         SIUNION sival1;
1194         SIUNION sival2;
1195         SIUNION carry;
1196         ZVALUE tmp;
1197         ZVALUE z1tmp, z2tmp;
1198         int sign;
1199
1200         sign = z1.sign ^ z2.sign;
1201         z1.sign = 0;
1202         z2.sign = 0;
1203         z1tmp.len = 0;
1204         if (zrel(z1, rp->mod) >= 0) {
1205                 zmod(z1, rp->mod, &z1tmp, 0);
1206                 z1 = z1tmp;
1207         }
1208         z2tmp.len = 0;
1209         if (zrel(z2, rp->mod) >= 0) {
1210                 zmod(z2, rp->mod, &z2tmp, 0);
1211                 z2 = z2tmp;
1212         }
1213
1214
1215         /*
1216          * Check for special values which we easily know the answer.
1217          */
1218         if (ziszero(z1) || ziszero(z2)) {
1219                 *res = _zero_;
1220                 if (z1tmp.len)
1221                         zfree(z1tmp);
1222                 if (z2tmp.len)
1223                         zfree(z2tmp);
1224                 return;
1225         }
1226
1227         if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1228                 (zcmp(z1, rp->one) == 0)) {
1229                         if (sign)
1230                                 zsub(rp->mod, z2, res);
1231                         else
1232                                 zcopy(z2, res);
1233                         if (z1tmp.len)
1234                                 zfree(z1tmp);
1235                         if (z2tmp.len)
1236                                 zfree(z2tmp);
1237                         return;
1238         }
1239
1240         if ((z2.len == rp->one.len) && (z2.v[0] == rp->one.v[0]) &&
1241                 (zcmp(z2, rp->one) == 0)) {
1242                         if (sign)
1243                                 zsub(rp->mod, z1, res);
1244                         else
1245                                 zcopy(z1, res);
1246                         if (z1tmp.len)
1247                                 zfree(z1tmp);
1248                         if (z2tmp.len)
1249                                 zfree(z2tmp);
1250                         return;
1251         }
1252
1253         /*
1254          * If the size of the modulus is large, then just do the multiply,
1255          * followed by the two multiplies contained in the REDC routine.
1256          * This will be quicker than directly doing the REDC calculation
1257          * because of the O(N^1.585) speed of the multiplies.  The size
1258          * of the number which this is done is configurable.
1259          */
1260         if (rp->mod.len >= conf->redc2) {
1261                 zmul(z1, z2, &tmp);
1262                 zredcdecode(rp, tmp, res);
1263                 zfree(tmp);
1264                 if (sign && !ziszero(*res)) {
1265                         zsub(rp->mod, *res, &tmp);
1266                         zfree(*res);
1267                         *res = tmp;
1268                 }
1269                 if (z1tmp.len)
1270                         zfree(z1tmp);
1271                 if (z2tmp.len)
1272                         zfree(z2tmp);
1273                 return;
1274         }
1275
1276         /*
1277          * The number is small enough to calculate by doing the O(N^2) REDC
1278          * algorithm directly.  This algorithm performs the multiplication and
1279          * the reduction at the same time.  Notice the obscure facts that
1280          * only the lowest word of the inverse value is used, and that
1281          * there is no shifting of the partial products as there is in a
1282          * normal multiply.
1283          */
1284         modlen = rp->mod.len;
1285         Ninv = rp->inv.v[0];
1286
1287         /*
1288          * Allocate the result and clear it.
1289          * The size of the result will be equal to or smaller than
1290          * the modulus size.
1291          */
1292         res->sign = 0;
1293         res->len = modlen;
1294         res->v = alloc(modlen);
1295
1296         hd = res->v;
1297         len = modlen;
1298         zclearval(*res);
1299
1300         /*
1301          * Do this outermost loop over all the digits of z1.
1302          */
1303         h1 = z1.v;
1304         len = z1.len;
1305         while (len--) {
1306                 /*
1307                  * Start off with the next digit of z1, the first
1308                  * digit of z2, and the first digit of the modulus.
1309                  */
1310                 mulb = (FULL) *h1++;
1311                 h2 = z2.v;
1312                 h3 = rp->mod.v;
1313                 hd = res->v;
1314                 sival1.ivalue = mulb * ((FULL) *h2++) + ((FULL) *hd++);
1315                 muln = ((HALF) (sival1.silow * Ninv));
1316                 sival2.ivalue = muln * ((FULL) *h3++) + ((FULL) sival1.silow);
1317                 carry.ivalue = ((FULL) sival1.sihigh) + ((FULL) sival2.sihigh);
1318
1319                 /*
1320                  * Do this innermost loop for each digit of z2, except
1321                  * for the first digit which was just done above.
1322                  */
1323                 len2 = z2.len;
1324                 while (--len2 > 0) {
1325                         sival1.ivalue = mulb * ((FULL) *h2++)
1326                                 + ((FULL) *hd) + ((FULL) carry.silow);
1327                         sival2.ivalue = muln * ((FULL) *h3++)
1328                                 + ((FULL) sival1.silow);
1329                         carry.ivalue = ((FULL) sival1.sihigh)
1330                                 + ((FULL) sival2.sihigh)
1331                                 + ((FULL) carry.sihigh);
1332
1333                         hd[-1] = sival2.silow;
1334                         hd++;
1335                 }
1336
1337                 /*
1338                  * Now continue the loop as necessary so the total number
1339                  * of iterations is equal to the size of the modulus.
1340                  * This acts as if the innermost loop was repeated for
1341                  * high digits of z2 that are zero.
1342                  */
1343                 len2 = modlen - z2.len;
1344                 while (len2--) {
1345                         sival2.ivalue = muln * ((FULL) *h3++)
1346                                 + ((FULL) *hd)
1347                                 + ((FULL) carry.silow);
1348                         carry.ivalue = ((FULL) sival2.sihigh)
1349                                 + ((FULL) carry.sihigh);
1350
1351                         hd[-1] = sival2.silow;
1352                         hd++;
1353                 }
1354
1355                 carry.ivalue += topdigit;
1356                 hd[-1] = carry.silow;
1357                 topdigit = carry.sihigh;
1358         }
1359
1360         /*
1361          * Now continue the loop as necessary so the total number
1362          * of iterations is equal to the size of the modulus.
1363          * This acts as if the outermost loop was repeated for high
1364          * digits of z1 that are zero.
1365          */
1366         len = modlen - z1.len;
1367         while (len--) {
1368                 /*
1369                  * Start off with the first digit of the modulus.
1370                  */
1371                 h3 = rp->mod.v;
1372                 hd = res->v;
1373                 muln = ((HALF) (*hd * Ninv));
1374                 sival2.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
1375                 carry.ivalue = ((FULL) sival2.sihigh);
1376
1377                 /*
1378                  * Do this innermost loop for each digit of the modulus,
1379                  * except for the first digit which was just done above.
1380                  */
1381                 len2 = modlen;
1382                 while (--len2 > 0) {
1383                         sival2.ivalue = muln * ((FULL) *h3++)
1384                                 + ((FULL) *hd) + ((FULL) carry.silow);
1385                         carry.ivalue = ((FULL) sival2.sihigh)
1386                                 + ((FULL) carry.sihigh);
1387
1388                         hd[-1] = sival2.silow;
1389                         hd++;
1390                 }
1391                 carry.ivalue += topdigit;
1392                 hd[-1] = carry.silow;
1393                 topdigit = carry.sihigh;
1394         }
1395
1396         /*
1397          * Determine the true size of the result, taking the top digit of
1398          * the current result into account.  The top digit is not stored in
1399          * the number because it is temporary and would become zero anyway
1400          * after the final subtraction is done.
1401          */
1402         if (topdigit == 0) {
1403                 len = modlen;
1404                 while (*--hd == 0 && len > 1) {
1405                         len--;
1406                 }
1407                 res->len = len;
1408
1409         /*
1410          * Compare the result with the modulus.
1411          * If it is less than the modulus, then the calculation is complete.
1412          */
1413
1414                 if (zrel(*res, rp->mod) < 0) {
1415                         if (z1tmp.len)
1416                                 zfree(z1tmp);
1417                         if (z2tmp.len)
1418                                 zfree(z2tmp);
1419                         if (sign && !ziszero(*res)) {
1420                                 zsub(rp->mod, *res, &tmp);
1421                                 zfree(*res);
1422                                 *res = tmp;
1423                         }
1424                         return;
1425                 }
1426         }
1427
1428         /*
1429          * Do a subtraction to reduce the result to a value less than
1430          * the modulus.  The REDC algorithm guarantees that a single subtract
1431          * is all that is needed.  Ignore any borrowing from the possible
1432          * highest word of the current result because that would affect
1433          * only the top digit value that was not stored and would become
1434          * zero anyway.
1435          */
1436         carry.ivalue = 0;
1437         h1 = rp->mod.v;
1438         hd = res->v;
1439         len = modlen;
1440         while (len--) {
1441                 carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
1442                         + ((FULL) carry.silow);
1443                 *hd++ = (HALF)(BASE1 - carry.silow);
1444                 carry.silow = carry.sihigh;
1445         }
1446
1447         /*
1448          * Now finally recompute the size of the result.
1449          */
1450         len = modlen;
1451         hd = &res->v[len - 1];
1452         while ((*hd == 0) && (len > 1)) {
1453                 hd--;
1454                 len--;
1455         }
1456         res->len = len;
1457         if (z1tmp.len)
1458                 zfree(z1tmp);
1459         if (z2tmp.len)
1460                 zfree(z2tmp);
1461         if (sign && !ziszero(*res)) {
1462                 zsub(rp->mod, *res, &tmp);
1463                 zfree(*res);
1464                 *res = tmp;
1465         }
1466
1467 }
1468
1469 /*
1470  * Square a number in REDC format producing a result also in REDC format.
1471  *
1472  * given:
1473  *      rp              REDC information
1474  *      z1              REDC number to be squared
1475  *      res             resulting REDC number
1476  */
1477 void
1478 zredcsquare(REDC *rp, ZVALUE z1, ZVALUE *res)
1479 {
1480         FULL mulb;
1481         FULL muln;
1482         HALF *h1;
1483         HALF *h2;
1484         HALF *h3;
1485         HALF *hd = NULL;
1486         HALF Ninv;
1487         HALF topdigit = 0;
1488         LEN modlen;
1489         LEN len;
1490         SIUNION sival1;
1491         SIUNION sival2;
1492         SIUNION sival3;
1493         SIUNION carry;
1494         ZVALUE tmp, ztmp;
1495         FULL f;
1496         int i, j;
1497
1498         ztmp.len = 0;
1499         z1.sign = 0;
1500         if (zrel(z1, rp->mod) >= 0) {
1501                 zmod(z1, rp->mod, &ztmp, 0);
1502                 z1 = ztmp;
1503         }
1504         if (ziszero(z1)) {
1505                 *res = _zero_;
1506                 if (ztmp.len)
1507                         zfree(ztmp);
1508                 return;
1509         }
1510         if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
1511                 (zcmp(z1, rp->one) == 0)) {
1512                         zcopy(z1, res);
1513                         if (ztmp.len)
1514                                 zfree(ztmp);
1515                         return;
1516         }
1517
1518
1519         /*
1520          * If the modulus is small enough, then call the multiply
1521          * routine to produce the result.  Otherwise call the O(N^1.585)
1522          * routines to get the answer.
1523          */
1524         if (rp->mod.len >= conf->redc2
1525                         || 3 * z1.len < 2 * rp->mod.len) {
1526                 zsquare(z1, &tmp);
1527                 zredcdecode(rp, tmp, res);
1528                 zfree(tmp);
1529                 if (ztmp.len)
1530                         zfree(ztmp);
1531                 return;
1532         }
1533         modlen = rp->mod.len;
1534         Ninv = rp->inv.v[0];
1535
1536         res->sign = 0;
1537         res->len = modlen;
1538         res->v = alloc(modlen);
1539
1540         zclearval(*res);
1541
1542         h1 = z1.v;
1543
1544         for (i = 0; i < z1.len; i++) {
1545                 mulb = (FULL) *h1++;
1546                 h2 = h1;
1547                 h3 = rp->mod.v;
1548                 hd = res->v;
1549                 if (i == 0) {
1550                         sival1.ivalue = mulb * mulb;
1551                         muln = (HALF) (sival1.silow * Ninv);
1552                         sival2.ivalue = muln * ((FULL) *h3++)
1553                                 + (FULL) sival1.silow;
1554                         carry.ivalue = (FULL) sival1.sihigh
1555                                 + (FULL) sival2.sihigh;
1556                         hd++;
1557                 } else {
1558                         muln = (HALF) (*hd * Ninv);
1559                         f = (muln * ((FULL) *h3++) + (FULL) *hd++) >> BASEB;
1560                         j = i;
1561                         while (--j > 0) {
1562                                 f += muln * ((FULL) *h3++) + *hd;
1563                                 hd[-1] = (HALF) f;
1564                                 f >>= BASEB;
1565                                 hd++;
1566                         }
1567                         carry.ivalue = f;
1568                         sival1.ivalue = mulb * mulb + (FULL) carry.silow;
1569                         sival2.ivalue = muln * ((FULL) *h3++)
1570                                 + (FULL) *hd
1571                                 + (FULL) sival1.silow;
1572                         carry.ivalue = (FULL) sival1.sihigh
1573                                 + (FULL) sival2.sihigh
1574                                 + (FULL) carry.sihigh;
1575                         hd[-1] = sival2.silow;
1576                         hd++;
1577                 }
1578                 j = z1.len - i;
1579                 while (--j > 0) {
1580                         sival1.ivalue = mulb * ((FULL) *h2++);
1581                         sival2.ivalue = ((FULL) sival1.silow << 1)
1582                                 + muln * ((FULL) *h3++);
1583                         sival3.ivalue = (FULL) sival2.silow
1584                                 + (FULL) *hd
1585                                 + (FULL) carry.silow;
1586                         carry.ivalue = ((FULL) sival1.sihigh << 1)
1587                                 + (FULL) sival2.sihigh
1588                                 + (FULL) sival3.sihigh
1589                                 + (FULL) carry.sihigh;
1590                         hd[-1] = sival3.silow;
1591                         hd++;
1592                 }
1593                 j = modlen - z1.len;
1594                 while (j-- > 0) {
1595                         sival1.ivalue = muln * ((FULL) *h3++)
1596                                 + (FULL) *hd
1597                                 + (FULL) carry.silow;
1598                         carry.ivalue = (FULL) sival1.sihigh
1599                                 + (FULL) carry.sihigh;
1600                         hd[-1] = sival1.silow;
1601                         hd++;
1602                 }
1603                 carry.ivalue += (FULL) topdigit;
1604                 hd[-1] = carry.silow;
1605                 topdigit = carry.sihigh;
1606         }
1607         i = modlen - z1.len;
1608         while (i-- > 0) {
1609                 h3 = rp->mod.v;
1610                 hd = res->v;
1611                 muln = (HALF) (*hd * Ninv);
1612                 sival1.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
1613                 carry.ivalue = (FULL) sival1.sihigh;
1614                 j = modlen;
1615                 while (--j > 0) {
1616                         sival1.ivalue = muln * ((FULL) *h3++)
1617                                 + (FULL) *hd
1618                                 + (FULL) carry.silow;
1619                         carry.ivalue = (FULL) sival1.sihigh
1620                                 + (FULL) carry.sihigh;
1621                         hd[-1] = sival1.silow;
1622                         hd++;
1623                 }
1624                 carry.ivalue += (FULL) topdigit;
1625                 hd[-1] = carry.silow;
1626                 topdigit = carry.sihigh;
1627         }
1628         if (topdigit == 0) {
1629                 len = modlen;
1630                 while (*--hd == 0 && len > 1) {
1631                         len--;
1632                 }
1633                 res->len = len;
1634                 if  (zrel(*res, rp->mod) < 0) {
1635                         if (ztmp.len)
1636                                 zfree(ztmp);
1637                         return;
1638                 }
1639         }
1640
1641         carry.ivalue = 0;
1642         h1 = rp->mod.v;
1643         hd = res->v;
1644         len = modlen;
1645         while (len--) {
1646                 carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
1647                         + ((FULL) carry.silow);
1648                 *hd++ = (HALF)(BASE1 - carry.silow);
1649                 carry.silow = carry.sihigh;
1650         }
1651
1652         len = modlen;
1653         hd = &res->v[len - 1];
1654         while ((*hd == 0) && (len > 1)) {
1655                 hd--;
1656                 len--;
1657         }
1658         res->len = len;
1659         if (ztmp.len)
1660                 zfree(ztmp);
1661 }
1662
1663
1664 /*
1665  * Compute the result of raising a REDC format number to a power.
1666  * The result is within the range 0 to the modulus - 1.
1667  * This calculates the result by examining the power POWBITS bits at a time,
1668  * using a small table of POWNUMS low powers to calculate powers for those bits,
1669  * and repeated squaring and multiplying by the partial powers to generate
1670  * the complete power.
1671  *
1672  * given:
1673  *      rp              REDC information
1674  *      z1              REDC number to be raised
1675  *      z2              normal number to raise number to
1676  *      res             result
1677  */
1678 void
1679 zredcpower(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
1680 {
1681         HALF *hp;               /* pointer to current word of the power */
1682         ZVALUE *pp;             /* pointer to low power table */
1683         ZVALUE ans, temp;       /* calculation values */
1684         ZVALUE ztmp;
1685         ZVALUE modpow;          /* current small power */
1686         ZVALUE lowpowers[POWNUMS];      /* low powers */
1687         int curshift;           /* shift value for word of power */
1688         HALF curhalf;           /* current word of power */
1689         unsigned int curpow;    /* current low power */
1690         unsigned int curbit;    /* current bit of low power */
1691         int sign;
1692         int i;
1693
1694         if (zisneg(z2)) {
1695                 math_error("Negative power in zredcpower");
1696                 /*NOTREACHED*/
1697         }
1698
1699         if (zisunit(rp->mod)) {
1700                 *res = _zero_;
1701                 return;
1702         }
1703
1704         sign = zisodd(z2) ? z1.sign : 0;
1705         z1.sign = 0;
1706         ztmp.len = 0;
1707         if (zrel(z1, rp->mod) >= 0) {
1708                 zmod(z1, rp->mod, &ztmp, 0);
1709                 z1 = ztmp;
1710         }
1711         /*
1712          * Check for zero or the REDC format for one.
1713          */
1714         if (ziszero(z1)) {
1715                 if (ziszero(z2))
1716                         *res = _one_;
1717                 else
1718                         *res = _zero_;
1719                 if (ztmp.len)
1720                         zfree(ztmp);
1721                 return;
1722         }
1723         if (zcmp(z1, rp->one) == 0) {
1724                 if (sign)
1725                         zsub(rp->mod, rp->one, res);
1726                 else
1727                         zcopy(rp->one, res);
1728                 if (ztmp.len)
1729                         zfree(ztmp);
1730                 return;
1731         }
1732
1733         /*
1734          * See if the number being raised is the REDC format for -1.
1735          * If so, then the answer is the REDC format for one or minus one.
1736          * To do this check, calculate the REDC format for -1.
1737          */
1738         if (((HALF)(z1.v[0] + rp->one.v[0])) == rp->mod.v[0]) {
1739                 zsub(rp->mod, rp->one, &temp);
1740                 if (zcmp(z1, temp) == 0) {
1741                         if (zisodd(z2) ^ sign) {
1742                                 *res = temp;
1743                                 if (ztmp.len)
1744                                         zfree(ztmp);
1745                                 return;
1746                         }
1747                         zfree(temp);
1748                         zcopy(rp->one, res);
1749                         if (ztmp.len)
1750                                 zfree(ztmp);
1751                         return;
1752                 }
1753                 zfree(temp);
1754         }
1755
1756         for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++)
1757                 pp->len = 0;
1758         zcopy(rp->one, &lowpowers[0]);
1759         zcopy(z1, &lowpowers[1]);
1760         zcopy(rp->one, &ans);
1761
1762         hp = &z2.v[z2.len - 1];
1763         curhalf = *hp;
1764         curshift = BASEB - POWBITS;
1765         while (curshift && ((curhalf >> curshift) == 0))
1766                 curshift -= POWBITS;
1767
1768         /*
1769          * Calculate the result by examining the power POWBITS bits at a time,
1770          * and use the table of low powers at each iteration.
1771          */
1772         for (;;) {
1773                 curpow = (curhalf >> curshift) & (POWNUMS - 1);
1774                 pp = &lowpowers[curpow];
1775
1776                 /*
1777                  * If the small power is not yet saved in the table, then
1778                  * calculate it and remember it in the table for future use.
1779                  */
1780                 if (pp->len == 0) {
1781                         if (curpow & 0x1)
1782                                 zcopy(z1, &modpow);
1783                         else
1784                                 zcopy(rp->one, &modpow);
1785
1786                         for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
1787                                 pp = &lowpowers[curbit];
1788                                 if (pp->len == 0)
1789                                         zredcsquare(rp, lowpowers[curbit/2],
1790                                                 pp);
1791                                 if (curbit & curpow) {
1792                                         zredcmul(rp, *pp, modpow, &temp);
1793                                         zfree(modpow);
1794                                         modpow = temp;
1795                                 }
1796                         }
1797                         pp = &lowpowers[curpow];
1798                         if (pp->len > 0) {
1799                                 zfree(*pp);
1800                         }
1801                         *pp = modpow;
1802                 }
1803
1804                 /*
1805                  * If the power is nonzero, then accumulate the small power
1806                  * into the result.
1807                  */
1808                 if (curpow) {
1809                         zredcmul(rp, ans, *pp, &temp);
1810                         zfree(ans);
1811                         ans = temp;
1812                 }
1813
1814                 /*
1815                  * Select the next POWBITS bits of the power, if there is
1816                  * any more to generate.
1817                  */
1818                 curshift -= POWBITS;
1819                 if (curshift < 0) {
1820                         if (hp-- == z2.v)
1821                                 break;
1822                         curhalf = *hp;
1823                         curshift = BASEB - POWBITS;
1824                 }
1825
1826                 /*
1827                  * Square the result POWBITS times to make room for the next
1828                  * chunk of bits.
1829                  */
1830                 for (i = 0; i < POWBITS; i++) {
1831                         zredcsquare(rp, ans, &temp);
1832                         zfree(ans);
1833                         ans = temp;
1834                 }
1835         }
1836
1837         for (pp = lowpowers; pp < &lowpowers[POWNUMS]; pp++) {
1838                 if (pp->len)
1839                         freeh(pp->v);
1840         }
1841         if (sign && !ziszero(ans)) {
1842                 zsub(rp->mod, ans, res);
1843                 zfree(ans);
1844         } else {
1845                 *res = ans;
1846         }
1847         if (ztmp.len)
1848                 zfree(ztmp);
1849 }
1850
1851
1852 /*
1853  * zhnrmod - compute z mod h*2^n+r
1854  *
1855  * We compute v mod h*2^n+r, where h>0, n>0, abs(r) <= 1, as follows:
1856  *
1857  *      Let v = b*2^n + a, where 0 <= a < 2^n
1858  *
1859  *      Now v mod h*2^n+r == b*2^n + a mod h*2^n+r,
1860  *      and thus v mod h*2^n+r == b*2^n mod h*2^n+r + a mod h*2^n+r.
1861  *
1862  *      Because 0 <= a < 2^n < h*2^n+r, a mod h*2^n+r == a.
1863  *      Thus v mod h*2^n+r == b*2^n mod h*2^n+r + a.
1864  *
1865  *      It can be shown that b*2^n mod h*2^n == 2^n * (b mod h).
1866  *
1867  *      Thus for r == 0, v mod h*2^n+r == (2^n)*(b mod h) + a.
1868  *
1869  *      It can be shown that v mod 2^n-1 == a+b mod 2^n-1.
1870  *
1871  *      Thus for r == -1, v mod h*2^n+r == (2^n)*(b mod h) + a + int(b/h).
1872  *
1873  *      It can be shown that v mod 2^n+1 == a-b mod 2^n+1.
1874  *
1875  *      Thus for r == +1, v mod h*2^n+r == (2^n)*(b mod h) + a - int(b/h).
1876  *
1877  *      Therefore, v mod h*2^n+r == (2^n)*(b mod h) + a - r*int(b/h).
1878  *
1879  * The above proof leads to the following calc resource file which computes
1880  * the value z mod h*2^n+r:
1881  *
1882  *    define hnrmod(v,h,n,r)
1883  *    {
1884  *      local a,b,modulus,tquo,tmod,lbit,ret;
1885  *
1886  *      if (!isint(h) || h < 1) {
1887  *          quit "h must be an integer be > 0";
1888  *      }
1889  *      if (!isint(n) || n < 1) {
1890  *          quit "n must be an integer be > 0";
1891  *      }
1892  *      if (r != 1 && r != 0 && r != -1) {
1893  *          quit "r must be -1, 0 or 1";
1894  *      }
1895  *
1896  *      lbit = lowbit(h);
1897  *      if (lbit > 0) {
1898  *          n += lbit;
1899  *          h >>= lbit;
1900  *      }
1901  *
1902  *      modulus = h<<n+r;
1903  *      if (modulus <= 2^31-1) {
1904  *          return v % modulus;
1905  *      }
1906  *      ret = v;
1907  *
1908  *      do {
1909  *          if (highbit(ret) < n) {
1910  *              break;
1911  *          }
1912  *          b = ret>>n;
1913  *          a = ret - (b<<n);
1914  *
1915  *          switch (r) {
1916  *          case -1:
1917  *              if (h == 1) {
1918  *                  ret = a + b;
1919  *              } else {
1920  *                  quomod(b, h, tquo, tmod);
1921  *                  ret = tmod<<n + a + tquo;
1922  *              }
1923  *              break;
1924  *          case 0:
1925  *              if (h == 1) {
1926  *                  ret = a;
1927  *              } else {
1928  *                  ret = (b%h)<<n + a;
1929  *              }
1930  *              break;
1931  *          case 1:
1932  *              if (h == 1) {
1933  *                  ret = ((a > b) ? a-b : modulus+a-b);
1934  *              } else {
1935  *                  quomod(b, h, tquo, tmod);
1936  *                  tmod = tmod<<n + a;
1937  *                  ret = ((tmod >= tquo) ? tmod-tquo : modulus+tmod-tquo);
1938  *              }
1939  *              break;
1940  *          }
1941  *      } while (ret > modulus);
1942  *      ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret));
1943  *
1944  *      return ret;
1945  *    }
1946  *
1947  * This function implements the above calc resource file.
1948  *
1949  * given:
1950  *      v               take mod of this value, v >= 0
1951  *      zh              h from modulus h*2^n+r, h > 0
1952  *      zn              n from modulus h*2^n+r, n > 0
1953  *      zr              r from modulus h*2^n+r, abs(r) <= 1
1954  *      res             v mod h*2^n+r
1955  */
1956 void
1957 zhnrmod(ZVALUE v, ZVALUE zh, ZVALUE zn, ZVALUE zr, ZVALUE *res)
1958 {
1959         ZVALUE a;               /* lower n bits of v */
1960         ZVALUE b;               /* bits above the lower n bits of v */
1961         ZVALUE h;               /* working zh value */
1962         ZVALUE modulus;         /* h^2^n + r */
1963         ZVALUE tquo;            /* b // h */
1964         ZVALUE tmod;            /* b % h or (b%h)<<n + a */
1965         ZVALUE t;               /* temp ZVALUE */
1966         ZVALUE t2;              /* temp ZVALUE */
1967         ZVALUE ret;             /* return value, what *res is set to */
1968         long n;                 /* integer value of zn */
1969         long r;                 /* integer value of zr */
1970         long hbit;              /* highbit(res) */
1971         long lbit;              /* lowbit(h) */
1972         int zrelval;            /* return value of zrel() */
1973         int hisone;             /* 1 => h == 1, 0 => h != 1 */
1974
1975         /*
1976          * firewall
1977          */
1978         if (zisneg(zh) || ziszero(zh)) {
1979                 math_error("h must be > 0");
1980                 /*NOTREACHED*/
1981         }
1982         if (zisneg(zn) || ziszero(zn)) {
1983                 math_error("n must be > 0");
1984                 /*NOTREACHED*/
1985         }
1986         if (zge31b(zn)) {
1987                 math_error("n must be < 2^31");
1988                 /*NOTREACHED*/
1989         }
1990         if (!zisabsleone(zr)) {
1991                 math_error("r must be -1, 0 or 1");
1992                 /*NOTREACHED*/
1993         }
1994
1995
1996         /*
1997          * setup for loop
1998          */
1999         n = ztolong(zn);
2000         r = ztolong(zr);
2001         if (zisneg(zr)) {
2002                 r = -r;
2003         }
2004         /* lbit = lowbit(h); */
2005         lbit = zlowbit(zh);
2006         /* if (lbit > 0) { n += lbit; h >>= lbit; } */
2007         if (lbit > 0) {
2008                 n += lbit;
2009                 zshift(zh, -lbit, &h);
2010         } else {
2011                 h = zh;
2012         }
2013         /* modulus = h<<n+r; */
2014         zshift(h, n, &t);
2015         switch (r) {
2016         case 1:
2017                 zadd(t, _one_, &modulus);
2018                 zfree(t);
2019                 break;
2020         case 0:
2021                 modulus = t;
2022                 break;
2023         case -1:
2024                 zsub(t, _one_, &modulus);
2025                 zfree(t);
2026                 break;
2027         }
2028         /* if (modulus <= MAXLONG) { return v % modulus; } */
2029         if (!zgtmaxlong(modulus)) {
2030                 itoz(zmodi(v, ztolong(modulus)), res);
2031                 zfree(modulus);
2032                 if (lbit > 0) {
2033                         zfree(h);
2034                 }
2035                 return;
2036         }
2037         /* ret = v; */
2038         zcopy(v, &ret);
2039
2040         /*
2041          * shift-add modulus loop
2042          */
2043         hisone = zisone(h);
2044         do {
2045
2046                 /*
2047                  * split ret into to chunks, the lower n bits
2048                  * and everything above the lower n bits
2049                  */
2050                 /* if (highbit(ret) < n) { break; } */
2051                 hbit = (long)zhighbit(ret);
2052                 if (hbit < n) {
2053                         zrelval = (zcmp(ret, modulus) ? -1 : 0);
2054                         break;
2055                 }
2056                 /* b = ret>>n; */
2057                 zshift(ret, -n, &b);
2058                 b.sign = ret.sign;
2059                 /* a = ret - (b<<n); */
2060                 a.sign = ret.sign;
2061                 a.len = (n+BASEB-1)/BASEB;
2062                 a.v = alloc(a.len);
2063                 memcpy(a.v, ret.v, a.len*sizeof(HALF));
2064                 if (n % BASEB) {
2065                         a.v[a.len - 1] &= lowhalf[n % BASEB];
2066                 }
2067                 ztrim(&a);
2068
2069                 /*
2070                  * switch depending on r == -1, 0 or 1
2071                  */
2072                 switch (r) {
2073                 case -1:        /* v mod h*2^h-1 */
2074                         /* if (h == 1) ... */
2075                         if (hisone) {
2076                                 /* ret = a + b; */
2077                                 zfree(ret);
2078                                 zadd(a, b, &ret);
2079
2080                         /* ... else ... */
2081                         } else {
2082                                 /* quomod(b, h, tquo, tmod); */
2083                                 (void) zdiv(b, h, &tquo, &tmod, 0);
2084                                 /* ret = tmod<<n + a + tquo; */
2085                                 zshift(tmod, n, &t);
2086                                 zfree(tmod);
2087                                 zadd(a, tquo, &t2);
2088                                 zfree(tquo);
2089                                 zfree(ret);
2090                                 zadd(t, t2, &ret);
2091                                 zfree(t);
2092                                 zfree(t2);
2093                         }
2094                         break;
2095
2096                 case 0:         /* v mod h*2^h-1 */
2097                         /* if (h == 1) ... */
2098                         if (hisone) {
2099                                 /* ret = a; */
2100                                 zfree(ret);
2101                                 zcopy(a, &ret);
2102
2103                         /* ... else ... */
2104                         } else {
2105                                 /* ret = (b%h)<<n + a; */
2106                                 (void) zmod(b, h, &tmod, 0);
2107                                 zshift(tmod, n, &t);
2108                                 zfree(tmod);
2109                                 zfree(ret);
2110                                 zadd(t, a, &ret);
2111                                 zfree(t);
2112                         }
2113                         break;
2114
2115                 case 1:         /* v mod h*2^h-1 */
2116                         /* if (h == 1) ... */
2117                         if (hisone) {
2118                                 /* ret = a-b; */
2119                                 zfree(ret);
2120                                 zsub(a, b, &ret);
2121
2122                         /* ... else ... */
2123                         } else {
2124                                 /* quomod(b, h, tquo, tmod); */
2125                                 (void) zdiv(b, h, &tquo, &tmod, 0);
2126                                 /* tmod = tmod<<n + a; */
2127                                 zshift(tmod, n, &t);
2128                                 zfree(tmod);
2129                                 zadd(t, a, &tmod);
2130                                 zfree(t);
2131                                 /* ret = tmod-tquo; */
2132                                 zfree(ret);
2133                                 zsub(tmod, tquo, &ret);
2134                                 zfree(tquo);
2135                                 zfree(tmod);
2136                         }
2137                         break;
2138                 }
2139                 zfree(a);
2140                 zfree(b);
2141
2142         /* ... while (abs(ret) > modulus); */
2143         } while ((zrelval = zabsrel(ret, modulus)) > 0);
2144         /* ret = ((ret < 0) ? ret+modlus : ((ret == modulus) ? 0 : ret)); */
2145         if (ret.sign) {
2146                 zadd(ret, modulus, &t);
2147                 zfree(ret);
2148                 ret = t;
2149         } else if (zrelval == 0) {
2150                 zfree(ret);
2151                 ret = _zero_;
2152         }
2153         zfree(modulus);
2154         if (lbit > 0) {
2155                 zfree(h);
2156         }
2157
2158         /*
2159          * return ret
2160          */
2161         *res = ret;
2162         return;
2163 }