security/nss/lib/freebl/mpi/mpi.c

   1 /*
   2  *  mpi.c
   3  *
   4  *  Arbitrary precision integer arithmetic library
   5  *
   6  * ***** BEGIN LICENSE BLOCK *****
   7  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   8  *
   9  * The contents of this file are subject to the Mozilla Public License Version
  10  * 1.1 (the "License"); you may not use this file except in compliance with
  11  * the License. You may obtain a copy of the License at
  12  * http://www.mozilla.org/MPL/
  13  *
  14  * Software distributed under the License is distributed on an "AS IS" basis,
  15  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  16  * for the specific language governing rights and limitations under the
  17  * License.
  18  *
  19  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
  20  *
  21  * The Initial Developer of the Original Code is
  22  * Michael J. Fromberger.
  23  * Portions created by the Initial Developer are Copyright (C) 1998
  24  * the Initial Developer. All Rights Reserved.
  25  *
  26  * Contributor(s):
  27  *   Netscape Communications Corporation
  28  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
  29  *
  30  * Alternatively, the contents of this file may be used under the terms of
  31  * either the GNU General Public License Version 2 or later (the "GPL"), or
  32  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  33  * in which case the provisions of the GPL or the LGPL are applicable instead
  34  * of those above. If you wish to allow use of your version of this file only
  35  * under the terms of either the GPL or the LGPL, and not to allow others to
  36  * use your version of this file under the terms of the MPL, indicate your
  37  * decision by deleting the provisions above and replace them with the notice
  38  * and other provisions required by the GPL or the LGPL. If you do not delete
  39  * the provisions above, a recipient may use your version of this file under
  40  * the terms of any one of the MPL, the GPL or the LGPL.
  41  *
  42  * ***** END LICENSE BLOCK ***** */
  43 /* $Id: mpi.c,v 1.45 2006/09/29 20:12:21 alexei.volkov.bugs%sun.com Exp $ */
  44
  45 #include "mpi-priv.h"
  46 #if defined(OSF1)
  47 #include <c_asm.h>
  48 #endif
  49
  50 #if MP_LOGTAB
  51 /*
  52   A table of the logs of 2 for various bases (the 0 and 1 entries of
  53   this table are meaningless and should not be referenced).
  54
  55   This table is used to compute output lengths for the mp_toradix()
  56   function.  Since a number n in radix r takes up about log_r(n)
  57   digits, we estimate the output size by taking the least integer
  58   greater than log_r(n), where:
  59
  60   log_r(n) = log_2(n) * log_r(2)
  61
  62   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
  63   which are the output bases supported.
  64  */
  65 #include "logtab.h"
  66 #endif
  67
  68 /* {{{ Constant strings */
  69
  70 /* Constant strings returned by mp_strerror() */
  71 static const char *mp_err_string[] = {
  72   "unknown result code",     /* say what?            */
  73   "boolean true",            /* MP_OKAY, MP_YES      */
  74   "boolean false",           /* MP_NO                */
  75   "out of memory",           /* MP_MEM               */
  76   "argument out of range",   /* MP_RANGE             */
  77   "invalid input parameter", /* MP_BADARG            */
  78   "result is undefined"      /* MP_UNDEF             */
  79 };
  80
  81 /* Value to digit maps for radix conversion   */
  82
  83 /* s_dmap_1 - standard digits and letters */
  84 static const char *s_dmap_1 =
  85   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  86
  87 /* }}} */
  88
  89 unsigned long mp_allocs;
  90 unsigned long mp_frees;
  91 unsigned long mp_copies;
  92
  93 /* {{{ Default precision manipulation */
  94
  95 /* Default precision for newly created mp_int's      */
  96 static mp_size s_mp_defprec = MP_DEFPREC;
  97
  98 mp_size mp_get_prec(void)
  99 {
 100   return s_mp_defprec;
 101
 102 } /* end mp_get_prec() */
 103
 104 void         mp_set_prec(mp_size prec)
 105 {
 106   if(prec == 0)
 107     s_mp_defprec = MP_DEFPREC;
 108   else
 109     s_mp_defprec = prec;
 110
 111 } /* end mp_set_prec() */
 112
 113 /* }}} */
 114
 115 /*------------------------------------------------------------------------*/
 116 /* {{{ mp_init(mp) */
 117
 118 /*
 119   mp_init(mp)
 120
 121   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
 122   MP_MEM if memory could not be allocated for the structure.
 123  */
 124
 125 mp_err mp_init(mp_int *mp)
 126 {
 127   return mp_init_size(mp, s_mp_defprec);
 128
 129 } /* end mp_init() */
 130
 131 /* }}} */
 132
 133 /* {{{ mp_init_size(mp, prec) */
 134
 135 /*
 136   mp_init_size(mp, prec)
 137
 138   Initialize a new zero-valued mp_int with at least the given
 139   precision; returns MP_OKAY if successful, or MP_MEM if memory could
 140   not be allocated for the structure.
 141  */
 142
 143 mp_err mp_init_size(mp_int *mp, mp_size prec)
 144 {
 145   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
 146
 147   prec = MP_ROUNDUP(prec, s_mp_defprec);
 148   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
 149     return MP_MEM;
 150
 151   SIGN(mp) = ZPOS;
 152   USED(mp) = 1;
 153   ALLOC(mp) = prec;
 154
 155   return MP_OKAY;
 156
 157 } /* end mp_init_size() */
 158
 159 /* }}} */
 160
 161 /* {{{ mp_init_copy(mp, from) */
 162
 163 /*
 164   mp_init_copy(mp, from)
 165
 166   Initialize mp as an exact copy of from.  Returns MP_OKAY if
 167   successful, MP_MEM if memory could not be allocated for the new
 168   structure.
 169  */
 170
 171 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
 172 {
 173   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
 174
 175   if(mp == from)
 176     return MP_OKAY;
 177
 178   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
 179     return MP_MEM;
 180
 181   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
 182   USED(mp) = USED(from);
 183   ALLOC(mp) = ALLOC(from);
 184   SIGN(mp) = SIGN(from);
 185
 186   return MP_OKAY;
 187
 188 } /* end mp_init_copy() */
 189
 190 /* }}} */
 191
 192 /* {{{ mp_copy(from, to) */
 193
 194 /*
 195   mp_copy(from, to)
 196
 197   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
 198   'to' has already been initialized (if not, use mp_init_copy()
 199   instead). If 'from' and 'to' are identical, nothing happens.
 200  */
 201
 202 mp_err mp_copy(const mp_int *from, mp_int *to)
 203 {
 204   ARGCHK(from != NULL && to != NULL, MP_BADARG);
 205
 206   if(from == to)
 207     return MP_OKAY;
 208
 209   ++mp_copies;
 210   { /* copy */
 211     mp_digit   *tmp;
 212
 213     /*
 214       If the allocated buffer in 'to' already has enough space to hold
 215       all the used digits of 'from', we'll re-use it to avoid hitting
 216       the memory allocater more than necessary; otherwise, we'd have
 217       to grow anyway, so we just allocate a hunk and make the copy as
 218       usual
 219      */
 220     if(ALLOC(to) >= USED(from)) {
 221       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
 222       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
 223
 224     } else {
 225       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
 226         return MP_MEM;
 227
 228       s_mp_copy(DIGITS(from), tmp, USED(from));
 229
 230       if(DIGITS(to) != NULL) {
 231 #if MP_CRYPTO
 232         s_mp_setz(DIGITS(to), ALLOC(to));
 233 #endif
 234         s_mp_free(DIGITS(to));
 235       }
 236
 237       DIGITS(to) = tmp;
 238       ALLOC(to) = ALLOC(from);
 239     }
 240
 241     /* Copy the precision and sign from the original */
 242     USED(to) = USED(from);
 243     SIGN(to) = SIGN(from);
 244   } /* end copy */
 245
 246   return MP_OKAY;
 247
 248 } /* end mp_copy() */
 249
 250 /* }}} */
 251
 252 /* {{{ mp_exch(mp1, mp2) */
 253
 254 /*
 255   mp_exch(mp1, mp2)
 256
 257   Exchange mp1 and mp2 without allocating any intermediate memory
 258   (well, unless you count the stack space needed for this call and the
 259   locals it creates...).  This cannot fail.
 260  */
 261
 262 void mp_exch(mp_int *mp1, mp_int *mp2)
 263 {
 264 #if MP_ARGCHK == 2
 265   assert(mp1 != NULL && mp2 != NULL);
 266 #else
 267   if(mp1 == NULL || mp2 == NULL)
 268     return;
 269 #endif
 270
 271   s_mp_exch(mp1, mp2);
 272
 273 } /* end mp_exch() */
 274
 275 /* }}} */
 276
 277 /* {{{ mp_clear(mp) */
 278
 279 /*
 280   mp_clear(mp)
 281
 282   Release the storage used by an mp_int, and void its fields so that
 283   if someone calls mp_clear() again for the same int later, we won't
 284   get tollchocked.
 285  */
 286
 287 void   mp_clear(mp_int *mp)
 288 {
 289   if(mp == NULL)
 290     return;
 291
 292   if(DIGITS(mp) != NULL) {
 293 #if MP_CRYPTO
 294     s_mp_setz(DIGITS(mp), ALLOC(mp));
 295 #endif
 296     s_mp_free(DIGITS(mp));
 297     DIGITS(mp) = NULL;
 298   }
 299
 300   USED(mp) = 0;
 301   ALLOC(mp) = 0;
 302
 303 } /* end mp_clear() */
 304
 305 /* }}} */
 306
 307 /* {{{ mp_zero(mp) */
 308
 309 /*
 310   mp_zero(mp)
 311
 312   Set mp to zero.  Does not change the allocated size of the structure,
 313   and therefore cannot fail (except on a bad argument, which we ignore)
 314  */
 315 void   mp_zero(mp_int *mp)
 316 {
 317   if(mp == NULL)
 318     return;
 319
 320   s_mp_setz(DIGITS(mp), ALLOC(mp));
 321   USED(mp) = 1;
 322   SIGN(mp) = ZPOS;
 323
 324 } /* end mp_zero() */
 325
 326 /* }}} */
 327
 328 /* {{{ mp_set(mp, d) */
 329
 330 void   mp_set(mp_int *mp, mp_digit d)
 331 {
 332   if(mp == NULL)
 333     return;
 334
 335   mp_zero(mp);
 336   DIGIT(mp, 0) = d;
 337
 338 } /* end mp_set() */
 339
 340 /* }}} */
 341
 342 /* {{{ mp_set_int(mp, z) */
 343
 344 mp_err mp_set_int(mp_int *mp, long z)
 345 {
 346   int            ix;
 347   unsigned long  v = labs(z);
 348   mp_err         res;
 349
 350   ARGCHK(mp != NULL, MP_BADARG);
 351
 352   mp_zero(mp);
 353   if(z == 0)
 354     return MP_OKAY;  /* shortcut for zero */
 355
 356   if (sizeof v <= sizeof(mp_digit)) {
 357     DIGIT(mp,0) = v;
 358   } else {
 359     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 360       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 361         return res;
 362
 363       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
 364       if (res != MP_OKAY)
 365         return res;
 366     }
 367   }
 368   if(z < 0)
 369     SIGN(mp) = NEG;
 370
 371   return MP_OKAY;
 372
 373 } /* end mp_set_int() */
 374
 375 /* }}} */
 376
 377 /* {{{ mp_set_ulong(mp, z) */
 378
 379 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
 380 {
 381   int            ix;
 382   mp_err         res;
 383
 384   ARGCHK(mp != NULL, MP_BADARG);
 385
 386   mp_zero(mp);
 387   if(z == 0)
 388     return MP_OKAY;  /* shortcut for zero */
 389
 390   if (sizeof z <= sizeof(mp_digit)) {
 391     DIGIT(mp,0) = z;
 392   } else {
 393     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 394       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 395         return res;
 396
 397       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
 398       if (res != MP_OKAY)
 399         return res;
 400     }
 401   }
 402   return MP_OKAY;
 403 } /* end mp_set_ulong() */
 404
 405 /* }}} */
 406
 407 /*------------------------------------------------------------------------*/
 408 /* {{{ Digit arithmetic */
 409
 410 /* {{{ mp_add_d(a, d, b) */
 411
 412 /*
 413   mp_add_d(a, d, b)
 414
 415   Compute the sum b = a + d, for a single digit d.  Respects the sign of
 416   its primary addend (single digits are unsigned anyway).
 417  */
 418
 419 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
 420 {
 421   mp_int   tmp;
 422   mp_err   res;
 423
 424   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 425
 426   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 427     return res;
 428
 429   if(SIGN(&tmp) == ZPOS) {
 430     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 431       goto CLEANUP;
 432   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 433     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 434       goto CLEANUP;
 435   } else {
 436     mp_neg(&tmp, &tmp);
 437
 438     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 439   }
 440
 441   if(s_mp_cmp_d(&tmp, 0) == 0)
 442     SIGN(&tmp) = ZPOS;
 443
 444   s_mp_exch(&tmp, b);
 445
 446 CLEANUP:
 447   mp_clear(&tmp);
 448   return res;
 449
 450 } /* end mp_add_d() */
 451
 452 /* }}} */
 453
 454 /* {{{ mp_sub_d(a, d, b) */
 455
 456 /*
 457   mp_sub_d(a, d, b)
 458
 459   Compute the difference b = a - d, for a single digit d.  Respects the
 460   sign of its subtrahend (single digits are unsigned anyway).
 461  */
 462
 463 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
 464 {
 465   mp_int   tmp;
 466   mp_err   res;
 467
 468   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 469
 470   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 471     return res;
 472
 473   if(SIGN(&tmp) == NEG) {
 474     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 475       goto CLEANUP;
 476   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 477     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 478       goto CLEANUP;
 479   } else {
 480     mp_neg(&tmp, &tmp);
 481
 482     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 483     SIGN(&tmp) = NEG;
 484   }
 485
 486   if(s_mp_cmp_d(&tmp, 0) == 0)
 487     SIGN(&tmp) = ZPOS;
 488
 489   s_mp_exch(&tmp, b);
 490
 491 CLEANUP:
 492   mp_clear(&tmp);
 493   return res;
 494
 495 } /* end mp_sub_d() */
 496
 497 /* }}} */
 498
 499 /* {{{ mp_mul_d(a, d, b) */
 500
 501 /*
 502   mp_mul_d(a, d, b)
 503
 504   Compute the product b = a * d, for a single digit d.  Respects the sign
 505   of its multiplicand (single digits are unsigned anyway)
 506  */
 507
 508 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
 509 {
 510   mp_err  res;
 511
 512   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 513
 514   if(d == 0) {
 515     mp_zero(b);
 516     return MP_OKAY;
 517   }
 518
 519   if((res = mp_copy(a, b)) != MP_OKAY)
 520     return res;
 521
 522   res = s_mp_mul_d(b, d);
 523
 524   return res;
 525
 526 } /* end mp_mul_d() */
 527
 528 /* }}} */
 529
 530 /* {{{ mp_mul_2(a, c) */
 531
 532 mp_err mp_mul_2(const mp_int *a, mp_int *c)
 533 {
 534   mp_err  res;
 535
 536   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 537
 538   if((res = mp_copy(a, c)) != MP_OKAY)
 539     return res;
 540
 541   return s_mp_mul_2(c);
 542
 543 } /* end mp_mul_2() */
 544
 545 /* }}} */
 546
 547 /* {{{ mp_div_d(a, d, q, r) */
 548
 549 /*
 550   mp_div_d(a, d, q, r)
 551
 552   Compute the quotient q = a / d and remainder r = a mod d, for a
 553   single digit d.  Respects the sign of its divisor (single digits are
 554   unsigned anyway).
 555  */
 556
 557 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
 558 {
 559   mp_err   res;
 560   mp_int   qp;
 561   mp_digit rem;
 562   int      pow;
 563
 564   ARGCHK(a != NULL, MP_BADARG);
 565
 566   if(d == 0)
 567     return MP_RANGE;
 568
 569   /* Shortcut for powers of two ... */
 570   if((pow = s_mp_ispow2d(d)) >= 0) {
 571     mp_digit  mask;
 572
 573     mask = ((mp_digit)1 << pow) - 1;
 574     rem = DIGIT(a, 0) & mask;
 575
 576     if(q) {
 577       mp_copy(a, q);
 578       s_mp_div_2d(q, pow);
 579     }
 580
 581     if(r)
 582       *r = rem;
 583
 584     return MP_OKAY;
 585   }
 586
 587   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
 588     return res;
 589
 590   res = s_mp_div_d(&qp, d, &rem);
 591
 592   if(s_mp_cmp_d(&qp, 0) == 0)
 593     SIGN(q) = ZPOS;
 594
 595   if(r)
 596     *r = rem;
 597
 598   if(q)
 599     s_mp_exch(&qp, q);
 600
 601   mp_clear(&qp);
 602   return res;
 603
 604 } /* end mp_div_d() */
 605
 606 /* }}} */
 607
 608 /* {{{ mp_div_2(a, c) */
 609
 610 /*
 611   mp_div_2(a, c)
 612
 613   Compute c = a / 2, disregarding the remainder.
 614  */
 615
 616 mp_err mp_div_2(const mp_int *a, mp_int *c)
 617 {
 618   mp_err  res;
 619
 620   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 621
 622   if((res = mp_copy(a, c)) != MP_OKAY)
 623     return res;
 624
 625   s_mp_div_2(c);
 626
 627   return MP_OKAY;
 628
 629 } /* end mp_div_2() */
 630
 631 /* }}} */
 632
 633 /* {{{ mp_expt_d(a, d, b) */
 634
 635 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
 636 {
 637   mp_int   s, x;
 638   mp_err   res;
 639
 640   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 641
 642   if((res = mp_init(&s)) != MP_OKAY)
 643     return res;
 644   if((res = mp_init_copy(&x, a)) != MP_OKAY)
 645     goto X;
 646
 647   DIGIT(&s, 0) = 1;
 648
 649   while(d != 0) {
 650     if(d & 1) {
 651       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 652         goto CLEANUP;
 653     }
 654
 655     d /= 2;
 656
 657     if((res = s_mp_sqr(&x)) != MP_OKAY)
 658       goto CLEANUP;
 659   }
 660
 661   s_mp_exch(&s, c);
 662
 663 CLEANUP:
 664   mp_clear(&x);
 665 X:
 666   mp_clear(&s);
 667
 668   return res;
 669
 670 } /* end mp_expt_d() */
 671
 672 /* }}} */
 673
 674 /* }}} */
 675
 676 /*------------------------------------------------------------------------*/
 677 /* {{{ Full arithmetic */
 678
 679 /* {{{ mp_abs(a, b) */
 680
 681 /*
 682   mp_abs(a, b)
 683
 684   Compute b = |a|.  'a' and 'b' may be identical.
 685  */
 686
 687 mp_err mp_abs(const mp_int *a, mp_int *b)
 688 {
 689   mp_err   res;
 690
 691   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 692
 693   if((res = mp_copy(a, b)) != MP_OKAY)
 694     return res;
 695
 696   SIGN(b) = ZPOS;
 697
 698   return MP_OKAY;
 699
 700 } /* end mp_abs() */
 701
 702 /* }}} */
 703
 704 /* {{{ mp_neg(a, b) */
 705
 706 /*
 707   mp_neg(a, b)
 708
 709   Compute b = -a.  'a' and 'b' may be identical.
 710  */
 711
 712 mp_err mp_neg(const mp_int *a, mp_int *b)
 713 {
 714   mp_err   res;
 715
 716   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 717
 718   if((res = mp_copy(a, b)) != MP_OKAY)
 719     return res;
 720
 721   if(s_mp_cmp_d(b, 0) == MP_EQ)
 722     SIGN(b) = ZPOS;
 723   else
 724     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
 725
 726   return MP_OKAY;
 727
 728 } /* end mp_neg() */
 729
 730 /* }}} */
 731
 732 /* {{{ mp_add(a, b, c) */
 733
 734 /*
 735   mp_add(a, b, c)
 736
 737   Compute c = a + b.  All parameters may be identical.
 738  */
 739
 740 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
 741 {
 742   mp_err  res;
 743
 744   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 745
 746   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 747     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 748   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
 749     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 750   } else {                          /* different sign: |a|  < |b|   */
 751     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 752   }
 753
 754   if (s_mp_cmp_d(c, 0) == MP_EQ)
 755     SIGN(c) = ZPOS;
 756
 757 CLEANUP:
 758   return res;
 759
 760 } /* end mp_add() */
 761
 762 /* }}} */
 763
 764 /* {{{ mp_sub(a, b, c) */
 765
 766 /*
 767   mp_sub(a, b, c)
 768
 769   Compute c = a - b.  All parameters may be identical.
 770  */
 771
 772 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
 773 {
 774   mp_err  res;
 775   int     magDiff;
 776
 777   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 778
 779   if (a == b) {
 780     mp_zero(c);
 781     return MP_OKAY;
 782   }
 783
 784   if (MP_SIGN(a) != MP_SIGN(b)) {
 785     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 786   } else if (!(magDiff = s_mp_cmp(a, b))) {
 787     mp_zero(c);
 788     res = MP_OKAY;
 789   } else if (magDiff > 0) {
 790     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 791   } else {
 792     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 793     MP_SIGN(c) = !MP_SIGN(a);
 794   }
 795
 796   if (s_mp_cmp_d(c, 0) == MP_EQ)
 797     MP_SIGN(c) = MP_ZPOS;
 798
 799 CLEANUP:
 800   return res;
 801
 802 } /* end mp_sub() */
 803
 804 /* }}} */
 805
 806 /* {{{ mp_mul(a, b, c) */
 807
 808 /*
 809   mp_mul(a, b, c)
 810
 811   Compute c = a * b.  All parameters may be identical.
 812  */
 813 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
 814 {
 815   mp_digit *pb;
 816   mp_int   tmp;
 817   mp_err   res;
 818   mp_size  ib;
 819   mp_size  useda, usedb;
 820
 821   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 822
 823   if (a == c) {
 824     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 825       return res;
 826     if (a == b)
 827       b = &tmp;
 828     a = &tmp;
 829   } else if (b == c) {
 830     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
 831       return res;
 832     b = &tmp;
 833   } else {
 834     MP_DIGITS(&tmp) = 0;
 835   }
 836
 837   if (MP_USED(a) < MP_USED(b)) {
 838     const mp_int *xch = b;      /* switch a and b, to do fewer outer loops */
 839     b = a;
 840     a = xch;
 841   }
 842
 843   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
 844   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
 845     goto CLEANUP;
 846
 847 #ifdef NSS_USE_COMBA
 848   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 849       if (MP_USED(a) == 4) {
 850           s_mp_mul_comba_4(a, b, c);
 851           goto CLEANUP;
 852       }
 853       if (MP_USED(a) == 8) {
 854           s_mp_mul_comba_8(a, b, c);
 855           goto CLEANUP;
 856       }
 857       if (MP_USED(a) == 16) {
 858           s_mp_mul_comba_16(a, b, c);
 859           goto CLEANUP;
 860       }
 861       if (MP_USED(a) == 32) {
 862           s_mp_mul_comba_32(a, b, c);
 863           goto CLEANUP;
 864       }
 865   }
 866 #endif
 867
 868   pb = MP_DIGITS(b);
 869   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
 870
 871   /* Outer loop:  Digits of b */
 872   useda = MP_USED(a);
 873   usedb = MP_USED(b);
 874   for (ib = 1; ib < usedb; ib++) {
 875     mp_digit b_i    = *pb++;
 876
 877     /* Inner product:  Digits of a */
 878     if (b_i)
 879       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
 880     else
 881       MP_DIGIT(c, ib + useda) = b_i;
 882   }
 883
 884   s_mp_clamp(c);
 885
 886   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
 887     SIGN(c) = ZPOS;
 888   else
 889     SIGN(c) = NEG;
 890
 891 CLEANUP:
 892   mp_clear(&tmp);
 893   return res;
 894 } /* end mp_mul() */
 895
 896 /* }}} */
 897
 898 /* {{{ mp_sqr(a, sqr) */
 899
 900 #if MP_SQUARE
 901 /*
 902   Computes the square of a.  This can be done more
 903   efficiently than a general multiplication, because many of the
 904   computation steps are redundant when squaring.  The inner product
 905   step is a bit more complicated, but we save a fair number of
 906   iterations of the multiplication loop.
 907  */
 908
 909 /* sqr = a^2;   Caller provides both a and tmp; */
 910 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
 911 {
 912   mp_digit *pa;
 913   mp_digit d;
 914   mp_err   res;
 915   mp_size  ix;
 916   mp_int   tmp;
 917   int      count;
 918
 919   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
 920
 921   if (a == sqr) {
 922     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 923       return res;
 924     a = &tmp;
 925   } else {
 926     DIGITS(&tmp) = 0;
 927     res = MP_OKAY;
 928   }
 929
 930   ix = 2 * MP_USED(a);
 931   if (ix > MP_ALLOC(sqr)) {
 932     MP_USED(sqr) = 1;
 933     MP_CHECKOK( s_mp_grow(sqr, ix) );
 934   }
 935   MP_USED(sqr) = ix;
 936   MP_DIGIT(sqr, 0) = 0;
 937
 938 #ifdef NSS_USE_COMBA
 939   if (IS_POWER_OF_2(MP_USED(a))) {
 940       if (MP_USED(a) == 4) {
 941           s_mp_sqr_comba_4(a, sqr);
 942           goto CLEANUP;
 943       }
 944       if (MP_USED(a) == 8) {
 945           s_mp_sqr_comba_8(a, sqr);
 946           goto CLEANUP;
 947       }
 948       if (MP_USED(a) == 16) {
 949           s_mp_sqr_comba_16(a, sqr);
 950           goto CLEANUP;
 951       }
 952       if (MP_USED(a) == 32) {
 953           s_mp_sqr_comba_32(a, sqr);
 954           goto CLEANUP;
 955       }
 956   }
 957 #endif
 958
 959   pa = MP_DIGITS(a);
 960   count = MP_USED(a) - 1;
 961   if (count > 0) {
 962     d = *pa++;
 963     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
 964     for (ix = 3; --count > 0; ix += 2) {
 965       d = *pa++;
 966       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
 967     } /* for(ix ...) */
 968     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
 969
 970     /* now sqr *= 2 */
 971     s_mp_mul_2(sqr);
 972   } else {
 973     MP_DIGIT(sqr, 1) = 0;
 974   }
 975
 976   /* now add the squares of the digits of a to sqr. */
 977   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
 978
 979   SIGN(sqr) = ZPOS;
 980   s_mp_clamp(sqr);
 981
 982 CLEANUP:
 983   mp_clear(&tmp);
 984   return res;
 985
 986 } /* end mp_sqr() */
 987 #endif
 988
 989 /* }}} */
 990
 991 /* {{{ mp_div(a, b, q, r) */
 992
 993 /*
 994   mp_div(a, b, q, r)
 995
 996   Compute q = a / b and r = a mod b.  Input parameters may be re-used
 997   as output parameters.  If q or r is NULL, that portion of the
 998   computation will be discarded (although it will still be computed)
 999  */
1000 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1001 {
1002   mp_err   res;
1003   mp_int   *pQ, *pR;
1004   mp_int   qtmp, rtmp, btmp;
1005   int      cmp;
1006   mp_sign  signA;
1007   mp_sign  signB;
1008
1009   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1010
1011   signA = MP_SIGN(a);
1012   signB = MP_SIGN(b);
1013
1014   if(mp_cmp_z(b) == MP_EQ)
1015     return MP_RANGE;
1016
1017   DIGITS(&qtmp) = 0;
1018   DIGITS(&rtmp) = 0;
1019   DIGITS(&btmp) = 0;
1020
1021   /* Set up some temporaries... */
1022   if (!r || r == a || r == b) {
1023     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1024     pR = &rtmp;
1025   } else {
1026     MP_CHECKOK( mp_copy(a, r) );
1027     pR = r;
1028   }
1029
1030   if (!q || q == a || q == b) {
1031     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a)) );
1032     pQ = &qtmp;
1033   } else {
1034     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1035     pQ = q;
1036     mp_zero(pQ);
1037   }
1038
1039   /*
1040     If |a| <= |b|, we can compute the solution without division;
1041     otherwise, we actually do the work required.
1042    */
1043   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1044     if (cmp) {
1045       /* r was set to a above. */
1046       mp_zero(pQ);
1047     } else {
1048       mp_set(pQ, 1);
1049       mp_zero(pR);
1050     }
1051   } else {
1052     MP_CHECKOK( mp_init_copy(&btmp, b) );
1053     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1054   }
1055
1056   /* Compute the signs for the output  */
1057   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1058   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1059   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1060
1061   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1062     SIGN(pQ) = ZPOS;
1063   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1064     SIGN(pR) = ZPOS;
1065
1066   /* Copy output, if it is needed      */
1067   if(q && q != pQ)
1068     s_mp_exch(pQ, q);
1069
1070   if(r && r != pR)
1071     s_mp_exch(pR, r);
1072
1073 CLEANUP:
1074   mp_clear(&btmp);
1075   mp_clear(&rtmp);
1076   mp_clear(&qtmp);
1077
1078   return res;
1079
1080 } /* end mp_div() */
1081
1082 /* }}} */
1083
1084 /* {{{ mp_div_2d(a, d, q, r) */
1085
1086 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1087 {
1088   mp_err  res;
1089
1090   ARGCHK(a != NULL, MP_BADARG);
1091
1092   if(q) {
1093     if((res = mp_copy(a, q)) != MP_OKAY)
1094       return res;
1095   }
1096   if(r) {
1097     if((res = mp_copy(a, r)) != MP_OKAY)
1098       return res;
1099   }
1100   if(q) {
1101     s_mp_div_2d(q, d);
1102   }
1103   if(r) {
1104     s_mp_mod_2d(r, d);
1105   }
1106
1107   return MP_OKAY;
1108
1109 } /* end mp_div_2d() */
1110
1111 /* }}} */
1112
1113 /* {{{ mp_expt(a, b, c) */
1114
1115 /*
1116   mp_expt(a, b, c)
1117
1118   Compute c = a ** b, that is, raise a to the b power.  Uses a
1119   standard iterative square-and-multiply technique.
1120  */
1121
1122 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1123 {
1124   mp_int   s, x;
1125   mp_err   res;
1126   mp_digit d;
1127   int      dig, bit;
1128
1129   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1130
1131   if(mp_cmp_z(b) < 0)
1132     return MP_RANGE;
1133
1134   if((res = mp_init(&s)) != MP_OKAY)
1135     return res;
1136
1137   mp_set(&s, 1);
1138
1139   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1140     goto X;
1141
1142   /* Loop over low-order digits in ascending order */
1143   for(dig = 0; dig < (USED(b) - 1); dig++) {
1144     d = DIGIT(b, dig);
1145
1146     /* Loop over bits of each non-maximal digit */
1147     for(bit = 0; bit < DIGIT_BIT; bit++) {
1148       if(d & 1) {
1149         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1150           goto CLEANUP;
1151       }
1152
1153       d >>= 1;
1154
1155       if((res = s_mp_sqr(&x)) != MP_OKAY)
1156         goto CLEANUP;
1157     }
1158   }
1159
1160   /* Consider now the last digit... */
1161   d = DIGIT(b, dig);
1162
1163   while(d) {
1164     if(d & 1) {
1165       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1166         goto CLEANUP;
1167     }
1168
1169     d >>= 1;
1170
1171     if((res = s_mp_sqr(&x)) != MP_OKAY)
1172       goto CLEANUP;
1173   }
1174
1175   if(mp_iseven(b))
1176     SIGN(&s) = SIGN(a);
1177
1178   res = mp_copy(&s, c);
1179
1180 CLEANUP:
1181   mp_clear(&x);
1182 X:
1183   mp_clear(&s);
1184
1185   return res;
1186
1187 } /* end mp_expt() */
1188
1189 /* }}} */
1190
1191 /* {{{ mp_2expt(a, k) */
1192
1193 /* Compute a = 2^k */
1194
1195 mp_err mp_2expt(mp_int *a, mp_digit k)
1196 {
1197   ARGCHK(a != NULL, MP_BADARG);
1198
1199   return s_mp_2expt(a, k);
1200
1201 } /* end mp_2expt() */
1202
1203 /* }}} */
1204
1205 /* {{{ mp_mod(a, m, c) */
1206
1207 /*
1208   mp_mod(a, m, c)
1209
1210   Compute c = a (mod m).  Result will always be 0 <= c < m.
1211  */
1212
1213 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1214 {
1215   mp_err  res;
1216   int     mag;
1217
1218   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1219
1220   if(SIGN(m) == NEG)
1221     return MP_RANGE;
1222
1223   /*
1224      If |a| > m, we need to divide to get the remainder and take the
1225      absolute value.
1226
1227      If |a| < m, we don't need to do any division, just copy and adjust
1228      the sign (if a is negative).
1229
1230      If |a| == m, we can simply set the result to zero.
1231
1232      This order is intended to minimize the average path length of the
1233      comparison chain on common workloads -- the most frequent cases are
1234      that |a| != m, so we do those first.
1235    */
1236   if((mag = s_mp_cmp(a, m)) > 0) {
1237     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1238       return res;
1239
1240     if(SIGN(c) == NEG) {
1241       if((res = mp_add(c, m, c)) != MP_OKAY)
1242         return res;
1243     }
1244
1245   } else if(mag < 0) {
1246     if((res = mp_copy(a, c)) != MP_OKAY)
1247       return res;
1248
1249     if(mp_cmp_z(a) < 0) {
1250       if((res = mp_add(c, m, c)) != MP_OKAY)
1251         return res;
1252
1253     }
1254
1255   } else {
1256     mp_zero(c);
1257
1258   }
1259
1260   return MP_OKAY;
1261
1262 } /* end mp_mod() */
1263
1264 /* }}} */
1265
1266 /* {{{ mp_mod_d(a, d, c) */
1267
1268 /*
1269   mp_mod_d(a, d, c)
1270
1271   Compute c = a (mod d).  Result will always be 0 <= c < d
1272  */
1273 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1274 {
1275   mp_err   res;
1276   mp_digit rem;
1277
1278   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1279
1280   if(s_mp_cmp_d(a, d) > 0) {
1281     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1282       return res;
1283
1284   } else {
1285     if(SIGN(a) == NEG)
1286       rem = d - DIGIT(a, 0);
1287     else
1288       rem = DIGIT(a, 0);
1289   }
1290
1291   if(c)
1292     *c = rem;
1293
1294   return MP_OKAY;
1295
1296 } /* end mp_mod_d() */
1297
1298 /* }}} */
1299
1300 /* {{{ mp_sqrt(a, b) */
1301
1302 /*
1303   mp_sqrt(a, b)
1304
1305   Compute the integer square root of a, and store the result in b.
1306   Uses an integer-arithmetic version of Newton's iterative linear
1307   approximation technique to determine this value; the result has the
1308   following two properties:
1309
1310      b^2 <= a
1311      (b+1)^2 >= a
1312
1313   It is a range error to pass a negative value.
1314  */
1315 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1316 {
1317   mp_int   x, t;
1318   mp_err   res;
1319   mp_size  used;
1320
1321   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1322
1323   /* Cannot take square root of a negative value */
1324   if(SIGN(a) == NEG)
1325     return MP_RANGE;
1326
1327   /* Special cases for zero and one, trivial     */
1328   if(mp_cmp_d(a, 1) <= 0)
1329     return mp_copy(a, b);
1330
1331   /* Initialize the temporaries we'll use below  */
1332   if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
1333     return res;
1334
1335   /* Compute an initial guess for the iteration as a itself */
1336   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1337     goto X;
1338
1339   used = MP_USED(&x);
1340   if (used > 1) {
1341     s_mp_rshd(&x, used / 2);
1342   }
1343
1344   for(;;) {
1345     /* t = (x * x) - a */
1346     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1347     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1348        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1349       goto CLEANUP;
1350
1351     /* t = t / 2x       */
1352     s_mp_mul_2(&x);
1353     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1354       goto CLEANUP;
1355     s_mp_div_2(&x);
1356
1357     /* Terminate the loop, if the quotient is zero */
1358     if(mp_cmp_z(&t) == MP_EQ)
1359       break;
1360
1361     /* x = x - t       */
1362     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1363       goto CLEANUP;
1364
1365   }
1366
1367   /* Copy result to output parameter */
1368   mp_sub_d(&x, 1, &x);
1369   s_mp_exch(&x, b);
1370
1371  CLEANUP:
1372   mp_clear(&x);
1373  X:
1374   mp_clear(&t);
1375
1376   return res;
1377
1378 } /* end mp_sqrt() */
1379
1380 /* }}} */
1381
1382 /* }}} */
1383
1384 /*------------------------------------------------------------------------*/
1385 /* {{{ Modular arithmetic */
1386
1387 #if MP_MODARITH
1388 /* {{{ mp_addmod(a, b, m, c) */
1389
1390 /*
1391   mp_addmod(a, b, m, c)
1392
1393   Compute c = (a + b) mod m
1394  */
1395
1396 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1397 {
1398   mp_err  res;
1399
1400   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1401
1402   if((res = mp_add(a, b, c)) != MP_OKAY)
1403     return res;
1404   if((res = mp_mod(c, m, c)) != MP_OKAY)
1405     return res;
1406
1407   return MP_OKAY;
1408
1409 }
1410
1411 /* }}} */
1412
1413 /* {{{ mp_submod(a, b, m, c) */
1414
1415 /*
1416   mp_submod(a, b, m, c)
1417
1418   Compute c = (a - b) mod m
1419  */
1420
1421 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1422 {
1423   mp_err  res;
1424
1425   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1426
1427   if((res = mp_sub(a, b, c)) != MP_OKAY)
1428     return res;
1429   if((res = mp_mod(c, m, c)) != MP_OKAY)
1430     return res;
1431
1432   return MP_OKAY;
1433
1434 }
1435
1436 /* }}} */
1437
1438 /* {{{ mp_mulmod(a, b, m, c) */
1439
1440 /*
1441   mp_mulmod(a, b, m, c)
1442
1443   Compute c = (a * b) mod m
1444  */
1445
1446 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1447 {
1448   mp_err  res;
1449
1450   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1451
1452   if((res = mp_mul(a, b, c)) != MP_OKAY)
1453     return res;
1454   if((res = mp_mod(c, m, c)) != MP_OKAY)
1455     return res;
1456
1457   return MP_OKAY;
1458
1459 }
1460
1461 /* }}} */
1462
1463 /* {{{ mp_sqrmod(a, m, c) */
1464
1465 #if MP_SQUARE
1466 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1467 {
1468   mp_err  res;
1469
1470   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1471
1472   if((res = mp_sqr(a, c)) != MP_OKAY)
1473     return res;
1474   if((res = mp_mod(c, m, c)) != MP_OKAY)
1475     return res;
1476
1477   return MP_OKAY;
1478
1479 } /* end mp_sqrmod() */
1480 #endif
1481
1482 /* }}} */
1483
1484 /* {{{ s_mp_exptmod(a, b, m, c) */
1485
1486 /*
1487   s_mp_exptmod(a, b, m, c)
1488
1489   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1490   method with modular reductions at each step. (This is basically the
1491   same code as mp_expt(), except for the addition of the reductions)
1492
1493   The modular reductions are done using Barrett's algorithm (see
1494   s_mp_reduce() below for details)
1495  */
1496
1497 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1498 {
1499   mp_int   s, x, mu;
1500   mp_err   res;
1501   mp_digit d;
1502   int      dig, bit;
1503
1504   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1505
1506   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1507     return MP_RANGE;
1508
1509   if((res = mp_init(&s)) != MP_OKAY)
1510     return res;
1511   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1512      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1513     goto X;
1514   if((res = mp_init(&mu)) != MP_OKAY)
1515     goto MU;
1516
1517   mp_set(&s, 1);
1518
1519   /* mu = b^2k / m */
1520   s_mp_add_d(&mu, 1);
1521   s_mp_lshd(&mu, 2 * USED(m));
1522   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1523     goto CLEANUP;
1524
1525   /* Loop over digits of b in ascending order, except highest order */
1526   for(dig = 0; dig < (USED(b) - 1); dig++) {
1527     d = DIGIT(b, dig);
1528
1529     /* Loop over the bits of the lower-order digits */
1530     for(bit = 0; bit < DIGIT_BIT; bit++) {
1531       if(d & 1) {
1532         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1533           goto CLEANUP;
1534         if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1535           goto CLEANUP;
1536       }
1537
1538       d >>= 1;
1539
1540       if((res = s_mp_sqr(&x)) != MP_OKAY)
1541         goto CLEANUP;
1542       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1543         goto CLEANUP;
1544     }
1545   }
1546
1547   /* Now do the last digit... */
1548   d = DIGIT(b, dig);
1549
1550   while(d) {
1551     if(d & 1) {
1552       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1553         goto CLEANUP;
1554       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1555         goto CLEANUP;
1556     }
1557
1558     d >>= 1;
1559
1560     if((res = s_mp_sqr(&x)) != MP_OKAY)
1561       goto CLEANUP;
1562     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1563       goto CLEANUP;
1564   }
1565
1566   s_mp_exch(&s, c);
1567
1568  CLEANUP:
1569   mp_clear(&mu);
1570  MU:
1571   mp_clear(&x);
1572  X:
1573   mp_clear(&s);
1574
1575   return res;
1576
1577 } /* end s_mp_exptmod() */
1578
1579 /* }}} */
1580
1581 /* {{{ mp_exptmod_d(a, d, m, c) */
1582
1583 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1584 {
1585   mp_int   s, x;
1586   mp_err   res;
1587
1588   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1589
1590   if((res = mp_init(&s)) != MP_OKAY)
1591     return res;
1592   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1593     goto X;
1594
1595   mp_set(&s, 1);
1596
1597   while(d != 0) {
1598     if(d & 1) {
1599       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1600          (res = mp_mod(&s, m, &s)) != MP_OKAY)
1601         goto CLEANUP;
1602     }
1603
1604     d /= 2;
1605
1606     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1607        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1608       goto CLEANUP;
1609   }
1610
1611   s_mp_exch(&s, c);
1612
1613 CLEANUP:
1614   mp_clear(&x);
1615 X:
1616   mp_clear(&s);
1617
1618   return res;
1619
1620 } /* end mp_exptmod_d() */
1621
1622 /* }}} */
1623 #endif /* if MP_MODARITH */
1624
1625 /* }}} */
1626
1627 /*------------------------------------------------------------------------*/
1628 /* {{{ Comparison functions */
1629
1630 /* {{{ mp_cmp_z(a) */
1631
1632 /*
1633   mp_cmp_z(a)
1634
1635   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1636  */
1637
1638 int    mp_cmp_z(const mp_int *a)
1639 {
1640   if(SIGN(a) == NEG)
1641     return MP_LT;
1642   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1643     return MP_EQ;
1644   else
1645     return MP_GT;
1646
1647 } /* end mp_cmp_z() */
1648
1649 /* }}} */
1650
1651 /* {{{ mp_cmp_d(a, d) */
1652
1653 /*
1654   mp_cmp_d(a, d)
1655
1656   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1657  */
1658
1659 int    mp_cmp_d(const mp_int *a, mp_digit d)
1660 {
1661   ARGCHK(a != NULL, MP_EQ);
1662
1663   if(SIGN(a) == NEG)
1664     return MP_LT;
1665
1666   return s_mp_cmp_d(a, d);
1667
1668 } /* end mp_cmp_d() */
1669
1670 /* }}} */
1671
1672 /* {{{ mp_cmp(a, b) */
1673
1674 int    mp_cmp(const mp_int *a, const mp_int *b)
1675 {
1676   ARGCHK(a != NULL && b != NULL, MP_EQ);
1677
1678   if(SIGN(a) == SIGN(b)) {
1679     int  mag;
1680
1681     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1682       return MP_EQ;
1683
1684     if(SIGN(a) == ZPOS)
1685       return mag;
1686     else
1687       return -mag;
1688
1689   } else if(SIGN(a) == ZPOS) {
1690     return MP_GT;
1691   } else {
1692     return MP_LT;
1693   }
1694
1695 } /* end mp_cmp() */
1696
1697 /* }}} */
1698
1699 /* {{{ mp_cmp_mag(a, b) */
1700
1701 /*
1702   mp_cmp_mag(a, b)
1703
1704   Compares |a| <=> |b|, and returns an appropriate comparison result
1705  */
1706
1707 int    mp_cmp_mag(mp_int *a, mp_int *b)
1708 {
1709   ARGCHK(a != NULL && b != NULL, MP_EQ);
1710
1711   return s_mp_cmp(a, b);
1712
1713 } /* end mp_cmp_mag() */
1714
1715 /* }}} */
1716
1717 /* {{{ mp_cmp_int(a, z) */
1718
1719 /*
1720   This just converts z to an mp_int, and uses the existing comparison
1721   routines.  This is sort of inefficient, but it's not clear to me how
1722   frequently this wil get used anyway.  For small positive constants,
1723   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1724  */
1725 int    mp_cmp_int(const mp_int *a, long z)
1726 {
1727   mp_int  tmp;
1728   int     out;
1729
1730   ARGCHK(a != NULL, MP_EQ);
1731
1732   mp_init(&tmp); mp_set_int(&tmp, z);
1733   out = mp_cmp(a, &tmp);
1734   mp_clear(&tmp);
1735
1736   return out;
1737
1738 } /* end mp_cmp_int() */
1739
1740 /* }}} */
1741
1742 /* {{{ mp_isodd(a) */
1743
1744 /*
1745   mp_isodd(a)
1746
1747   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1748  */
1749 int    mp_isodd(const mp_int *a)
1750 {
1751   ARGCHK(a != NULL, 0);
1752
1753   return (int)(DIGIT(a, 0) & 1);
1754
1755 } /* end mp_isodd() */
1756
1757 /* }}} */
1758
1759 /* {{{ mp_iseven(a) */
1760
1761 int    mp_iseven(const mp_int *a)
1762 {
1763   return !mp_isodd(a);
1764
1765 } /* end mp_iseven() */
1766
1767 /* }}} */
1768
1769 /* }}} */
1770
1771 /*------------------------------------------------------------------------*/
1772 /* {{{ Number theoretic functions */
1773
1774 #if MP_NUMTH
1775 /* {{{ mp_gcd(a, b, c) */
1776
1777 /*
1778   Like the old mp_gcd() function, except computes the GCD using the
1779   binary algorithm due to Josef Stein in 1961 (via Knuth).
1780  */
1781 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1782 {
1783   mp_err   res;
1784   mp_int   u, v, t;
1785   mp_size  k = 0;
1786
1787   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1788
1789   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1790       return MP_RANGE;
1791   if(mp_cmp_z(a) == MP_EQ) {
1792     return mp_copy(b, c);
1793   } else if(mp_cmp_z(b) == MP_EQ) {
1794     return mp_copy(a, c);
1795   }
1796
1797   if((res = mp_init(&t)) != MP_OKAY)
1798     return res;
1799   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1800     goto U;
1801   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1802     goto V;
1803
1804   SIGN(&u) = ZPOS;
1805   SIGN(&v) = ZPOS;
1806
1807   /* Divide out common factors of 2 until at least 1 of a, b is even */
1808   while(mp_iseven(&u) && mp_iseven(&v)) {
1809     s_mp_div_2(&u);
1810     s_mp_div_2(&v);
1811     ++k;
1812   }
1813
1814   /* Initialize t */
1815   if(mp_isodd(&u)) {
1816     if((res = mp_copy(&v, &t)) != MP_OKAY)
1817       goto CLEANUP;
1818
1819     /* t = -v */
1820     if(SIGN(&v) == ZPOS)
1821       SIGN(&t) = NEG;
1822     else
1823       SIGN(&t) = ZPOS;
1824
1825   } else {
1826     if((res = mp_copy(&u, &t)) != MP_OKAY)
1827       goto CLEANUP;
1828
1829   }
1830
1831   for(;;) {
1832     while(mp_iseven(&t)) {
1833       s_mp_div_2(&t);
1834     }
1835
1836     if(mp_cmp_z(&t) == MP_GT) {
1837       if((res = mp_copy(&t, &u)) != MP_OKAY)
1838         goto CLEANUP;
1839
1840     } else {
1841       if((res = mp_copy(&t, &v)) != MP_OKAY)
1842         goto CLEANUP;
1843
1844       /* v = -t */
1845       if(SIGN(&t) == ZPOS)
1846         SIGN(&v) = NEG;
1847       else
1848         SIGN(&v) = ZPOS;
1849     }
1850
1851     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1852       goto CLEANUP;
1853
1854     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1855       break;
1856   }
1857
1858   s_mp_2expt(&v, k);       /* v = 2^k   */
1859   res = mp_mul(&u, &v, c); /* c = u * v */
1860
1861  CLEANUP:
1862   mp_clear(&v);
1863  V:
1864   mp_clear(&u);
1865  U:
1866   mp_clear(&t);
1867
1868   return res;
1869
1870 } /* end mp_gcd() */
1871
1872 /* }}} */
1873
1874 /* {{{ mp_lcm(a, b, c) */
1875
1876 /* We compute the least common multiple using the rule:
1877
1878    ab = [a, b](a, b)
1879
1880    ... by computing the product, and dividing out the gcd.
1881  */
1882
1883 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1884 {
1885   mp_int  gcd, prod;
1886   mp_err  res;
1887
1888   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1889
1890   /* Set up temporaries */
1891   if((res = mp_init(&gcd)) != MP_OKAY)
1892     return res;
1893   if((res = mp_init(&prod)) != MP_OKAY)
1894     goto GCD;
1895
1896   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1897     goto CLEANUP;
1898   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1899     goto CLEANUP;
1900
1901   res = mp_div(&prod, &gcd, c, NULL);
1902
1903  CLEANUP:
1904   mp_clear(&prod);
1905  GCD:
1906   mp_clear(&gcd);
1907
1908   return res;
1909
1910 } /* end mp_lcm() */
1911
1912 /* }}} */
1913
1914 /* {{{ mp_xgcd(a, b, g, x, y) */
1915
1916 /*
1917   mp_xgcd(a, b, g, x, y)
1918
1919   Compute g = (a, b) and values x and y satisfying Bezout's identity
1920   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1921   based on the Stein algorithm used for mp_gcd()
1922   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1923  */
1924
1925 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1926 {
1927   mp_int   gx, xc, yc, u, v, A, B, C, D;
1928   mp_int  *clean[9];
1929   mp_err   res;
1930   int      last = -1;
1931
1932   if(mp_cmp_z(b) == 0)
1933     return MP_RANGE;
1934
1935   /* Initialize all these variables we need */
1936   MP_CHECKOK( mp_init(&u) );
1937   clean[++last] = &u;
1938   MP_CHECKOK( mp_init(&v) );
1939   clean[++last] = &v;
1940   MP_CHECKOK( mp_init(&gx) );
1941   clean[++last] = &gx;
1942   MP_CHECKOK( mp_init(&A) );
1943   clean[++last] = &A;
1944   MP_CHECKOK( mp_init(&B) );
1945   clean[++last] = &B;
1946   MP_CHECKOK( mp_init(&C) );
1947   clean[++last] = &C;
1948   MP_CHECKOK( mp_init(&D) );
1949   clean[++last] = &D;
1950   MP_CHECKOK( mp_init_copy(&xc, a) );
1951   clean[++last] = &xc;
1952   mp_abs(&xc, &xc);
1953   MP_CHECKOK( mp_init_copy(&yc, b) );
1954   clean[++last] = &yc;
1955   mp_abs(&yc, &yc);
1956
1957   mp_set(&gx, 1);
1958
1959   /* Divide by two until at least one of them is odd */
1960   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1961     mp_size nx = mp_trailing_zeros(&xc);
1962     mp_size ny = mp_trailing_zeros(&yc);
1963     mp_size n  = MP_MIN(nx, ny);
1964     s_mp_div_2d(&xc,n);
1965     s_mp_div_2d(&yc,n);
1966     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1967   }
1968
1969   mp_copy(&xc, &u);
1970   mp_copy(&yc, &v);
1971   mp_set(&A, 1); mp_set(&D, 1);
1972
1973   /* Loop through binary GCD algorithm */
1974   do {
1975     while(mp_iseven(&u)) {
1976       s_mp_div_2(&u);
1977
1978       if(mp_iseven(&A) && mp_iseven(&B)) {
1979         s_mp_div_2(&A); s_mp_div_2(&B);
1980       } else {
1981         MP_CHECKOK( mp_add(&A, &yc, &A) );
1982         s_mp_div_2(&A);
1983         MP_CHECKOK( mp_sub(&B, &xc, &B) );
1984         s_mp_div_2(&B);
1985       }
1986     }
1987
1988     while(mp_iseven(&v)) {
1989       s_mp_div_2(&v);
1990
1991       if(mp_iseven(&C) && mp_iseven(&D)) {
1992         s_mp_div_2(&C); s_mp_div_2(&D);
1993       } else {
1994         MP_CHECKOK( mp_add(&C, &yc, &C) );
1995         s_mp_div_2(&C);
1996         MP_CHECKOK( mp_sub(&D, &xc, &D) );
1997         s_mp_div_2(&D);
1998       }
1999     }
2000
2001     if(mp_cmp(&u, &v) >= 0) {
2002       MP_CHECKOK( mp_sub(&u, &v, &u) );
2003       MP_CHECKOK( mp_sub(&A, &C, &A) );
2004       MP_CHECKOK( mp_sub(&B, &D, &B) );
2005     } else {
2006       MP_CHECKOK( mp_sub(&v, &u, &v) );
2007       MP_CHECKOK( mp_sub(&C, &A, &C) );
2008       MP_CHECKOK( mp_sub(&D, &B, &D) );
2009     }
2010   } while (mp_cmp_z(&u) != 0);
2011
2012   /* copy results to output */
2013   if(x)
2014     MP_CHECKOK( mp_copy(&C, x) );
2015
2016   if(y)
2017     MP_CHECKOK( mp_copy(&D, y) );
2018
2019   if(g)
2020     MP_CHECKOK( mp_mul(&gx, &v, g) );
2021
2022  CLEANUP:
2023   while(last >= 0)
2024     mp_clear(clean[last--]);
2025
2026   return res;
2027
2028 } /* end mp_xgcd() */
2029
2030 /* }}} */
2031
2032 mp_size mp_trailing_zeros(const mp_int *mp)
2033 {
2034   mp_digit d;
2035   mp_size  n = 0;
2036   int      ix;
2037
2038   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2039     return n;
2040
2041   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2042     n += MP_DIGIT_BIT;
2043   if (!d)
2044     return 0;   /* shouldn't happen, but ... */
2045 #if !defined(MP_USE_UINT_DIGIT)
2046   if (!(d & 0xffffffffU)) {
2047     d >>= 32;
2048     n  += 32;
2049   }
2050 #endif
2051   if (!(d & 0xffffU)) {
2052     d >>= 16;
2053     n  += 16;
2054   }
2055   if (!(d & 0xffU)) {
2056     d >>= 8;
2057     n  += 8;
2058   }
2059   if (!(d & 0xfU)) {
2060     d >>= 4;
2061     n  += 4;
2062   }
2063   if (!(d & 0x3U)) {
2064     d >>= 2;
2065     n  += 2;
2066   }
2067   if (!(d & 0x1U)) {
2068     d >>= 1;
2069     n  += 1;
2070   }
2071 #if MP_ARGCHK == 2
2072   assert(0 != (d & 1));
2073 #endif
2074   return n;
2075 }
2076
2077 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2078 ** Returns k (positive) or error (negative).
2079 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2080 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2081 */
2082 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2083 {
2084   mp_err res;
2085   mp_err k    = 0;
2086   mp_int d, f, g;
2087
2088   ARGCHK(a && p && c, MP_BADARG);
2089
2090   MP_DIGITS(&d) = 0;
2091   MP_DIGITS(&f) = 0;
2092   MP_DIGITS(&g) = 0;
2093   MP_CHECKOK( mp_init(&d) );
2094   MP_CHECKOK( mp_init_copy(&f, a) );    /* f = a */
2095   MP_CHECKOK( mp_init_copy(&g, p) );    /* g = p */
2096
2097   mp_set(c, 1);
2098   mp_zero(&d);
2099
2100   if (mp_cmp_z(&f) == 0) {
2101     res = MP_UNDEF;
2102   } else
2103   for (;;) {
2104     int diff_sign;
2105     while (mp_iseven(&f)) {
2106       mp_size n = mp_trailing_zeros(&f);
2107       if (!n) {
2108         res = MP_UNDEF;
2109         goto CLEANUP;
2110       }
2111       s_mp_div_2d(&f, n);
2112       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2113       k += n;
2114     }
2115     if (mp_cmp_d(&f, 1) == MP_EQ) {     /* f == 1 */
2116       res = k;
2117       break;
2118     }
2119     diff_sign = mp_cmp(&f, &g);
2120     if (diff_sign < 0) {                /* f < g */
2121       s_mp_exch(&f, &g);
2122       s_mp_exch(c, &d);
2123     } else if (diff_sign == 0) {                /* f == g */
2124       res = MP_UNDEF;           /* a and p are not relatively prime */
2125       break;
2126     }
2127     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2128       MP_CHECKOK( mp_sub(&f, &g, &f) ); /* f = f - g */
2129       MP_CHECKOK( mp_sub(c,  &d,  c) ); /* c = c - d */
2130     } else {
2131       MP_CHECKOK( mp_add(&f, &g, &f) ); /* f = f + g */
2132       MP_CHECKOK( mp_add(c,  &d,  c) ); /* c = c + d */
2133     }
2134   }
2135   if (res >= 0) {
2136     while (MP_SIGN(c) != MP_ZPOS) {
2137       MP_CHECKOK( mp_add(c, p, c) );
2138     }
2139     res = k;
2140   }
2141
2142 CLEANUP:
2143   mp_clear(&d);
2144   mp_clear(&f);
2145   mp_clear(&g);
2146   return res;
2147 }
2148
2149 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2150 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2151 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2152 */
2153 mp_digit  s_mp_invmod_radix(mp_digit P)
2154 {
2155   mp_digit T = P;
2156   T *= 2 - (P * T);
2157   T *= 2 - (P * T);
2158   T *= 2 - (P * T);
2159   T *= 2 - (P * T);
2160 #if !defined(MP_USE_UINT_DIGIT)
2161   T *= 2 - (P * T);
2162   T *= 2 - (P * T);
2163 #endif
2164   return T;
2165 }
2166
2167 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2168 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2169 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2170 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2171 */
2172 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2173 {
2174   int      k_orig = k;
2175   mp_digit r;
2176   mp_size  ix;
2177   mp_err   res;
2178
2179   if (mp_cmp_z(c) < 0) {                /* c < 0 */
2180     MP_CHECKOK( mp_add(c, p, x) );      /* x = c + p */
2181   } else {
2182     MP_CHECKOK( mp_copy(c, x) );        /* x = c */
2183   }
2184
2185   /* make sure x is large enough */
2186   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2187   ix = MP_MAX(ix, MP_USED(x));
2188   MP_CHECKOK( s_mp_pad(x, ix) );
2189
2190   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2191
2192   for (ix = 0; k > 0; ix++) {
2193     int      j = MP_MIN(k, MP_DIGIT_BIT);
2194     mp_digit v = r * MP_DIGIT(x, ix);
2195     if (j < MP_DIGIT_BIT) {
2196       v &= ((mp_digit)1 << j) - 1;      /* v = v mod (2 ** j) */
2197     }
2198     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2199     k -= j;
2200   }
2201   s_mp_clamp(x);
2202   s_mp_div_2d(x, k_orig);
2203   res = MP_OKAY;
2204
2205 CLEANUP:
2206   return res;
2207 }
2208
2209 /* compute mod inverse using Schroeppel's method, only if m is odd */
2210 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2211 {
2212   int k;
2213   mp_err  res;
2214   mp_int  x;
2215
2216   ARGCHK(a && m && c, MP_BADARG);
2217
2218   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2219     return MP_RANGE;
2220   if (mp_iseven(m))
2221     return MP_UNDEF;
2222
2223   MP_DIGITS(&x) = 0;
2224
2225   if (a == c) {
2226     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2227       return res;
2228     if (a == m)
2229       m = &x;
2230     a = &x;
2231   } else if (m == c) {
2232     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2233       return res;
2234     m = &x;
2235   } else {
2236     MP_DIGITS(&x) = 0;
2237   }
2238
2239   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2240   k = res;
2241   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2242 CLEANUP:
2243   mp_clear(&x);
2244   return res;
2245 }
2246
2247 /* Known good algorithm for computing modular inverse.  But slow. */
2248 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2249 {
2250   mp_int  g, x;
2251   mp_err  res;
2252
2253   ARGCHK(a && m && c, MP_BADARG);
2254
2255   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2256     return MP_RANGE;
2257
2258   MP_DIGITS(&g) = 0;
2259   MP_DIGITS(&x) = 0;
2260   MP_CHECKOK( mp_init(&x) );
2261   MP_CHECKOK( mp_init(&g) );
2262
2263   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2264
2265   if (mp_cmp_d(&g, 1) != MP_EQ) {
2266     res = MP_UNDEF;
2267     goto CLEANUP;
2268   }
2269
2270   res = mp_mod(&x, m, c);
2271   SIGN(c) = SIGN(a);
2272
2273 CLEANUP:
2274   mp_clear(&x);
2275   mp_clear(&g);
2276
2277   return res;
2278 }
2279
2280 /* modular inverse where modulus is 2**k. */
2281 /* c = a**-1 mod 2**k */
2282 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2283 {
2284   mp_err res;
2285   mp_size ix = k + 4;
2286   mp_int t0, t1, val, tmp, two2k;
2287
2288   static const mp_digit d2 = 2;
2289   static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2290
2291   if (mp_iseven(a))
2292     return MP_UNDEF;
2293   if (k <= MP_DIGIT_BIT) {
2294     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2295     if (k < MP_DIGIT_BIT)
2296       i &= ((mp_digit)1 << k) - (mp_digit)1;
2297     mp_set(c, i);
2298     return MP_OKAY;
2299   }
2300   MP_DIGITS(&t0) = 0;
2301   MP_DIGITS(&t1) = 0;
2302   MP_DIGITS(&val) = 0;
2303   MP_DIGITS(&tmp) = 0;
2304   MP_DIGITS(&two2k) = 0;
2305   MP_CHECKOK( mp_init_copy(&val, a) );
2306   s_mp_mod_2d(&val, k);
2307   MP_CHECKOK( mp_init_copy(&t0, &val) );
2308   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2309   MP_CHECKOK( mp_init(&tmp) );
2310   MP_CHECKOK( mp_init(&two2k) );
2311   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2312   do {
2313     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2314     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2315     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2316     s_mp_mod_2d(&t1, k);
2317     while (MP_SIGN(&t1) != MP_ZPOS) {
2318       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2319     }
2320     if (mp_cmp(&t1, &t0) == MP_EQ)
2321       break;
2322     MP_CHECKOK( mp_copy(&t1, &t0) );
2323   } while (--ix > 0);
2324   if (!ix) {
2325     res = MP_UNDEF;
2326   } else {
2327     mp_exch(c, &t1);
2328   }
2329
2330 CLEANUP:
2331   mp_clear(&t0);
2332   mp_clear(&t1);
2333   mp_clear(&val);
2334   mp_clear(&tmp);
2335   mp_clear(&two2k);
2336   return res;
2337 }
2338
2339 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2340 {
2341   mp_err res;
2342   mp_size k;
2343   mp_int oddFactor, evenFactor; /* factors of the modulus */
2344   mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2345   mp_int C2, tmp1, tmp2;
2346
2347   /*static const mp_digit d1 = 1; */
2348   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2349
2350   if ((res = s_mp_ispow2(m)) >= 0) {
2351     k = res;
2352     return s_mp_invmod_2d(a, k, c);
2353   }
2354   MP_DIGITS(&oddFactor) = 0;
2355   MP_DIGITS(&evenFactor) = 0;
2356   MP_DIGITS(&oddPart) = 0;
2357   MP_DIGITS(&evenPart) = 0;
2358   MP_DIGITS(&C2)     = 0;
2359   MP_DIGITS(&tmp1)   = 0;
2360   MP_DIGITS(&tmp2)   = 0;
2361
2362   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2363   MP_CHECKOK( mp_init(&evenFactor) );
2364   MP_CHECKOK( mp_init(&oddPart) );
2365   MP_CHECKOK( mp_init(&evenPart) );
2366   MP_CHECKOK( mp_init(&C2)     );
2367   MP_CHECKOK( mp_init(&tmp1)   );
2368   MP_CHECKOK( mp_init(&tmp2)   );
2369
2370   k = mp_trailing_zeros(m);
2371   s_mp_div_2d(&oddFactor, k);
2372   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2373
2374   /* compute a**-1 mod oddFactor. */
2375   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2376   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2377   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2378
2379   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2380   /* let m1 = oddFactor,  v1 = oddPart,
2381    * let m2 = evenFactor, v2 = evenPart.
2382    */
2383
2384   /* Compute C2 = m1**-1 mod m2. */
2385   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2386
2387   /* compute u = (v2 - v1)*C2 mod m2 */
2388   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2389   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2390   s_mp_mod_2d(&tmp2, k);
2391   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2392     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2393   }
2394
2395   /* compute answer = v1 + u*m1 */
2396   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2397   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2398   /* not sure this is necessary, but it's low cost if not. */
2399   MP_CHECKOK( mp_mod(c,         m,          c) );
2400
2401 CLEANUP:
2402   mp_clear(&oddFactor);
2403   mp_clear(&evenFactor);
2404   mp_clear(&oddPart);
2405   mp_clear(&evenPart);
2406   mp_clear(&C2);
2407   mp_clear(&tmp1);
2408   mp_clear(&tmp2);
2409   return res;
2410 }
2411
2412
2413 /* {{{ mp_invmod(a, m, c) */
2414
2415 /*
2416   mp_invmod(a, m, c)
2417
2418   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2419   This is equivalent to the question of whether (a, m) = 1.  If not,
2420   MP_UNDEF is returned, and there is no inverse.
2421  */
2422
2423 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2424 {
2425
2426   ARGCHK(a && m && c, MP_BADARG);
2427
2428   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2429     return MP_RANGE;
2430
2431   if (mp_isodd(m)) {
2432     return s_mp_invmod_odd_m(a, m, c);
2433   }
2434   if (mp_iseven(a))
2435     return MP_UNDEF;    /* not invertable */
2436
2437   return s_mp_invmod_even_m(a, m, c);
2438
2439 } /* end mp_invmod() */
2440
2441 /* }}} */
2442 #endif /* if MP_NUMTH */
2443
2444 /* }}} */
2445
2446 /*------------------------------------------------------------------------*/
2447 /* {{{ mp_print(mp, ofp) */
2448
2449 #if MP_IOFUNC
2450 /*
2451   mp_print(mp, ofp)
2452
2453   Print a textual representation of the given mp_int on the output
2454   stream 'ofp'.  Output is generated using the internal radix.
2455  */
2456
2457 void   mp_print(mp_int *mp, FILE *ofp)
2458 {
2459   int   ix;
2460
2461   if(mp == NULL || ofp == NULL)
2462     return;
2463
2464   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2465
2466   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2467     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2468   }
2469
2470 } /* end mp_print() */
2471
2472 #endif /* if MP_IOFUNC */
2473
2474 /* }}} */
2475
2476 /*------------------------------------------------------------------------*/
2477 /* {{{ More I/O Functions */
2478
2479 /* {{{ mp_read_raw(mp, str, len) */
2480
2481 /*
2482    mp_read_raw(mp, str, len)
2483
2484    Read in a raw value (base 256) into the given mp_int
2485  */
2486
2487 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2488 {
2489   int            ix;
2490   mp_err         res;
2491   unsigned char *ustr = (unsigned char *)str;
2492
2493   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2494
2495   mp_zero(mp);
2496
2497   /* Get sign from first byte */
2498   if(ustr[0])
2499     SIGN(mp) = NEG;
2500   else
2501     SIGN(mp) = ZPOS;
2502
2503   /* Read the rest of the digits */
2504   for(ix = 1; ix < len; ix++) {
2505     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2506       return res;
2507     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2508       return res;
2509   }
2510
2511   return MP_OKAY;
2512
2513 } /* end mp_read_raw() */
2514
2515 /* }}} */
2516
2517 /* {{{ mp_raw_size(mp) */
2518
2519 int    mp_raw_size(mp_int *mp)
2520 {
2521   ARGCHK(mp != NULL, 0);
2522
2523   return (USED(mp) * sizeof(mp_digit)) + 1;
2524
2525 } /* end mp_raw_size() */
2526
2527 /* }}} */
2528
2529 /* {{{ mp_toraw(mp, str) */
2530
2531 mp_err mp_toraw(mp_int *mp, char *str)
2532 {
2533   int  ix, jx, pos = 1;
2534
2535   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2536
2537   str[0] = (char)SIGN(mp);
2538
2539   /* Iterate over each digit... */
2540   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2541     mp_digit  d = DIGIT(mp, ix);
2542
2543     /* Unpack digit bytes, high order first */
2544     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2545       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2546     }
2547   }
2548
2549   return MP_OKAY;
2550
2551 } /* end mp_toraw() */
2552
2553 /* }}} */
2554
2555 /* {{{ mp_read_radix(mp, str, radix) */
2556
2557 /*
2558   mp_read_radix(mp, str, radix)
2559
2560   Read an integer from the given string, and set mp to the resulting
2561   value.  The input is presumed to be in base 10.  Leading non-digit
2562   characters are ignored, and the function reads until a non-digit
2563   character or the end of the string.
2564  */
2565
2566 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2567 {
2568   int     ix = 0, val = 0;
2569   mp_err  res;
2570   mp_sign sig = ZPOS;
2571
2572   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2573          MP_BADARG);
2574
2575   mp_zero(mp);
2576
2577   /* Skip leading non-digit characters until a digit or '-' or '+' */
2578   while(str[ix] &&
2579         (s_mp_tovalue(str[ix], radix) < 0) &&
2580         str[ix] != '-' &&
2581         str[ix] != '+') {
2582     ++ix;
2583   }
2584
2585   if(str[ix] == '-') {
2586     sig = NEG;
2587     ++ix;
2588   } else if(str[ix] == '+') {
2589     sig = ZPOS; /* this is the default anyway... */
2590     ++ix;
2591   }
2592
2593   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2594     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2595       return res;
2596     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2597       return res;
2598     ++ix;
2599   }
2600
2601   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2602     SIGN(mp) = ZPOS;
2603   else
2604     SIGN(mp) = sig;
2605
2606   return MP_OKAY;
2607
2608 } /* end mp_read_radix() */
2609
2610 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2611 {
2612   int     radix = default_radix;
2613   int     cx;
2614   mp_sign sig   = ZPOS;
2615   mp_err  res;
2616
2617   /* Skip leading non-digit characters until a digit or '-' or '+' */
2618   while ((cx = *str) != 0 &&
2619         (s_mp_tovalue(cx, radix) < 0) &&
2620         cx != '-' &&
2621         cx != '+') {
2622     ++str;
2623   }
2624
2625   if (cx == '-') {
2626     sig = NEG;
2627     ++str;
2628   } else if (cx == '+') {
2629     sig = ZPOS; /* this is the default anyway... */
2630     ++str;
2631   }
2632
2633   if (str[0] == '0') {
2634     if ((str[1] | 0x20) == 'x') {
2635       radix = 16;
2636       str += 2;
2637     } else {
2638       radix = 8;
2639       str++;
2640     }
2641   }
2642   res = mp_read_radix(a, str, radix);
2643   if (res == MP_OKAY) {
2644     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2645   }
2646   return res;
2647 }
2648
2649 /* }}} */
2650
2651 /* {{{ mp_radix_size(mp, radix) */
2652
2653 int    mp_radix_size(mp_int *mp, int radix)
2654 {
2655   int  bits;
2656
2657   if(!mp || radix < 2 || radix > MAX_RADIX)
2658     return 0;
2659
2660   bits = USED(mp) * DIGIT_BIT - 1;
2661
2662   return s_mp_outlen(bits, radix);
2663
2664 } /* end mp_radix_size() */
2665
2666 /* }}} */
2667
2668 /* {{{ mp_toradix(mp, str, radix) */
2669
2670 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2671 {
2672   int  ix, pos = 0;
2673
2674   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2675   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2676
2677   if(mp_cmp_z(mp) == MP_EQ) {
2678     str[0] = '0';
2679     str[1] = '\0';
2680   } else {
2681     mp_err   res;
2682     mp_int   tmp;
2683     mp_sign  sgn;
2684     mp_digit rem, rdx = (mp_digit)radix;
2685     char     ch;
2686
2687     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2688       return res;
2689
2690     /* Save sign for later, and take absolute value */
2691     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2692
2693     /* Generate output digits in reverse order      */
2694     while(mp_cmp_z(&tmp) != 0) {
2695       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2696         mp_clear(&tmp);
2697         return res;
2698       }
2699
2700       /* Generate digits, use capital letters */
2701       ch = s_mp_todigit(rem, radix, 0);
2702
2703       str[pos++] = ch;
2704     }
2705
2706     /* Add - sign if original value was negative */
2707     if(sgn == NEG)
2708       str[pos++] = '-';
2709
2710     /* Add trailing NUL to end the string        */
2711     str[pos--] = '\0';
2712
2713     /* Reverse the digits and sign indicator     */
2714     ix = 0;
2715     while(ix < pos) {
2716       char tmp = str[ix];
2717
2718       str[ix] = str[pos];
2719       str[pos] = tmp;
2720       ++ix;
2721       --pos;
2722     }
2723
2724     mp_clear(&tmp);
2725   }
2726
2727   return MP_OKAY;
2728
2729 } /* end mp_toradix() */
2730
2731 /* }}} */
2732
2733 /* {{{ mp_tovalue(ch, r) */
2734
2735 int    mp_tovalue(char ch, int r)
2736 {
2737   return s_mp_tovalue(ch, r);
2738
2739 } /* end mp_tovalue() */
2740
2741 /* }}} */
2742
2743 /* }}} */
2744
2745 /* {{{ mp_strerror(ec) */
2746
2747 /*
2748   mp_strerror(ec)
2749
2750   Return a string describing the meaning of error code 'ec'.  The
2751   string returned is allocated in static memory, so the caller should
2752   not attempt to modify or free the memory associated with this
2753   string.
2754  */
2755 const char  *mp_strerror(mp_err ec)
2756 {
2757   int   aec = (ec < 0) ? -ec : ec;
2758
2759   /* Code values are negative, so the senses of these comparisons
2760      are accurate */
2761   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2762     return mp_err_string[0];  /* unknown error code */
2763   } else {
2764     return mp_err_string[aec + 1];
2765   }
2766
2767 } /* end mp_strerror() */
2768
2769 /* }}} */
2770
2771 /*========================================================================*/
2772 /*------------------------------------------------------------------------*/
2773 /* Static function definitions (internal use only)                        */
2774
2775 /* {{{ Memory management */
2776
2777 /* {{{ s_mp_grow(mp, min) */
2778
2779 /* Make sure there are at least 'min' digits allocated to mp              */
2780 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2781 {
2782   if(min > ALLOC(mp)) {
2783     mp_digit   *tmp;
2784
2785     /* Set min to next nearest default precision block size */
2786     min = MP_ROUNDUP(min, s_mp_defprec);
2787
2788     if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
2789       return MP_MEM;
2790
2791     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2792
2793 #if MP_CRYPTO
2794     s_mp_setz(DIGITS(mp), ALLOC(mp));
2795 #endif
2796     s_mp_free(DIGITS(mp));
2797     DIGITS(mp) = tmp;
2798     ALLOC(mp) = min;
2799   }
2800
2801   return MP_OKAY;
2802
2803 } /* end s_mp_grow() */
2804
2805 /* }}} */
2806
2807 /* {{{ s_mp_pad(mp, min) */
2808
2809 /* Make sure the used size of mp is at least 'min', growing if needed     */
2810 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2811 {
2812   if(min > USED(mp)) {
2813     mp_err  res;
2814
2815     /* Make sure there is room to increase precision  */
2816     if (min > ALLOC(mp)) {
2817       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2818         return res;
2819     } else {
2820       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2821     }
2822
2823     /* Increase precision; should already be 0-filled */
2824     USED(mp) = min;
2825   }
2826
2827   return MP_OKAY;
2828
2829 } /* end s_mp_pad() */
2830
2831 /* }}} */
2832
2833 /* {{{ s_mp_setz(dp, count) */
2834
2835 #if MP_MACRO == 0
2836 /* Set 'count' digits pointed to by dp to be zeroes                       */
2837 void s_mp_setz(mp_digit *dp, mp_size count)
2838 {
2839 #if MP_MEMSET == 0
2840   int  ix;
2841
2842   for(ix = 0; ix < count; ix++)
2843     dp[ix] = 0;
2844 #else
2845   memset(dp, 0, count * sizeof(mp_digit));
2846 #endif
2847
2848 } /* end s_mp_setz() */
2849 #endif
2850
2851 /* }}} */
2852
2853 /* {{{ s_mp_copy(sp, dp, count) */
2854
2855 #if MP_MACRO == 0
2856 /* Copy 'count' digits from sp to dp                                      */
2857 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2858 {
2859 #if MP_MEMCPY == 0
2860   int  ix;
2861
2862   for(ix = 0; ix < count; ix++)
2863     dp[ix] = sp[ix];
2864 #else
2865   memcpy(dp, sp, count * sizeof(mp_digit));
2866 #endif
2867
2868 } /* end s_mp_copy() */
2869 #endif
2870
2871 /* }}} */
2872
2873 /* {{{ s_mp_alloc(nb, ni) */
2874
2875 #if MP_MACRO == 0
2876 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2877 void    *s_mp_alloc(size_t nb, size_t ni)
2878 {
2879   ++mp_allocs;
2880   return calloc(nb, ni);
2881
2882 } /* end s_mp_alloc() */
2883 #endif
2884
2885 /* }}} */
2886
2887 /* {{{ s_mp_free(ptr) */
2888
2889 #if MP_MACRO == 0
2890 /* Free the memory pointed to by ptr                                      */
2891 void     s_mp_free(void *ptr)
2892 {
2893   if(ptr) {
2894     ++mp_frees;
2895     free(ptr);
2896   }
2897 } /* end s_mp_free() */
2898 #endif
2899
2900 /* }}} */
2901
2902 /* {{{ s_mp_clamp(mp) */
2903
2904 #if MP_MACRO == 0
2905 /* Remove leading zeroes from the given value                             */
2906 void     s_mp_clamp(mp_int *mp)
2907 {
2908   mp_size used = MP_USED(mp);
2909   while (used > 1 && DIGIT(mp, used - 1) == 0)
2910     --used;
2911   MP_USED(mp) = used;
2912 } /* end s_mp_clamp() */
2913 #endif
2914
2915 /* }}} */
2916
2917 /* {{{ s_mp_exch(a, b) */
2918
2919 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2920 void     s_mp_exch(mp_int *a, mp_int *b)
2921 {
2922   mp_int   tmp;
2923
2924   tmp = *a;
2925   *a = *b;
2926   *b = tmp;
2927
2928 } /* end s_mp_exch() */
2929
2930 /* }}} */
2931
2932 /* }}} */
2933
2934 /* {{{ Arithmetic helpers */
2935
2936 /* {{{ s_mp_lshd(mp, p) */
2937
2938 /*
2939    Shift mp leftward by p digits, growing if needed, and zero-filling
2940    the in-shifted digits at the right end.  This is a convenient
2941    alternative to multiplication by powers of the radix
2942    The value of USED(mp) must already have been set to the value for
2943    the shifted result.
2944  */
2945
2946 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2947 {
2948   mp_err  res;
2949   mp_size pos;
2950   int     ix;
2951
2952   if(p == 0)
2953     return MP_OKAY;
2954
2955   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2956     return MP_OKAY;
2957
2958   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2959     return res;
2960
2961   pos = USED(mp) - 1;
2962
2963   /* Shift all the significant figures over as needed */
2964   for(ix = pos - p; ix >= 0; ix--)
2965     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2966
2967   /* Fill the bottom digits with zeroes */
2968   for(ix = 0; ix < p; ix++)
2969     DIGIT(mp, ix) = 0;
2970
2971   return MP_OKAY;
2972
2973 } /* end s_mp_lshd() */
2974
2975 /* }}} */
2976
2977 /* {{{ s_mp_mul_2d(mp, d) */
2978
2979 /*
2980   Multiply the integer by 2^d, where d is a number of bits.  This
2981   amounts to a bitwise shift of the value.
2982  */
2983 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
2984 {
2985   mp_err   res;
2986   mp_digit dshift, bshift;
2987   mp_digit mask;
2988
2989   ARGCHK(mp != NULL,  MP_BADARG);
2990
2991   dshift = d / MP_DIGIT_BIT;
2992   bshift = d % MP_DIGIT_BIT;
2993   /* bits to be shifted out of the top word */
2994   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
2995   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
2996
2997   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
2998     return res;
2999
3000   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3001     return res;
3002
3003   if (bshift) {
3004     mp_digit *pa = MP_DIGITS(mp);
3005     mp_digit *alim = pa + MP_USED(mp);
3006     mp_digit  prev = 0;
3007
3008     for (pa += dshift; pa < alim; ) {
3009       mp_digit x = *pa;
3010       *pa++ = (x << bshift) | prev;
3011       prev = x >> (DIGIT_BIT - bshift);
3012     }
3013   }
3014
3015   s_mp_clamp(mp);
3016   return MP_OKAY;
3017 } /* end s_mp_mul_2d() */
3018
3019 /* {{{ s_mp_rshd(mp, p) */
3020
3021 /*
3022    Shift mp rightward by p digits.  Maintains the invariant that
3023    digits above the precision are all zero.  Digits shifted off the
3024    end are lost.  Cannot fail.
3025  */
3026
3027 void     s_mp_rshd(mp_int *mp, mp_size p)
3028 {
3029   mp_size  ix;
3030   mp_digit *src, *dst;
3031
3032   if(p == 0)
3033     return;
3034
3035   /* Shortcut when all digits are to be shifted off */
3036   if(p >= USED(mp)) {
3037     s_mp_setz(DIGITS(mp), ALLOC(mp));
3038     USED(mp) = 1;
3039     SIGN(mp) = ZPOS;
3040     return;
3041   }
3042
3043   /* Shift all the significant figures over as needed */
3044   dst = MP_DIGITS(mp);
3045   src = dst + p;
3046   for (ix = USED(mp) - p; ix > 0; ix--)
3047     *dst++ = *src++;
3048
3049   MP_USED(mp) -= p;
3050   /* Fill the top digits with zeroes */
3051   while (p-- > 0)
3052     *dst++ = 0;
3053
3054 #if 0
3055   /* Strip off any leading zeroes    */
3056   s_mp_clamp(mp);
3057 #endif
3058
3059 } /* end s_mp_rshd() */
3060
3061 /* }}} */
3062
3063 /* {{{ s_mp_div_2(mp) */
3064
3065 /* Divide by two -- take advantage of radix properties to do it fast      */
3066 void     s_mp_div_2(mp_int *mp)
3067 {
3068   s_mp_div_2d(mp, 1);
3069
3070 } /* end s_mp_div_2() */
3071
3072 /* }}} */
3073
3074 /* {{{ s_mp_mul_2(mp) */
3075
3076 mp_err s_mp_mul_2(mp_int *mp)
3077 {
3078   mp_digit *pd;
3079   int      ix, used;
3080   mp_digit kin = 0;
3081
3082   /* Shift digits leftward by 1 bit */
3083   used = MP_USED(mp);
3084   pd = MP_DIGITS(mp);
3085   for (ix = 0; ix < used; ix++) {
3086     mp_digit d = *pd;
3087     *pd++ = (d << 1) | kin;
3088     kin = (d >> (DIGIT_BIT - 1));
3089   }
3090
3091   /* Deal with rollover from last digit */
3092   if (kin) {
3093     if (ix >= ALLOC(mp)) {
3094       mp_err res;
3095       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3096         return res;
3097     }
3098
3099     DIGIT(mp, ix) = kin;
3100     USED(mp) += 1;
3101   }
3102
3103   return MP_OKAY;
3104
3105 } /* end s_mp_mul_2() */
3106
3107 /* }}} */
3108
3109 /* {{{ s_mp_mod_2d(mp, d) */
3110
3111 /*
3112   Remainder the integer by 2^d, where d is a number of bits.  This
3113   amounts to a bitwise AND of the value, and does not require the full
3114   division code
3115  */
3116 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3117 {
3118   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3119   mp_size  ix;
3120   mp_digit dmask;
3121
3122   if(ndig >= USED(mp))
3123     return;
3124
3125   /* Flush all the bits above 2^d in its digit */
3126   dmask = ((mp_digit)1 << nbit) - 1;
3127   DIGIT(mp, ndig) &= dmask;
3128
3129   /* Flush all digits above the one with 2^d in it */
3130   for(ix = ndig + 1; ix < USED(mp); ix++)
3131     DIGIT(mp, ix) = 0;
3132
3133   s_mp_clamp(mp);
3134
3135 } /* end s_mp_mod_2d() */
3136
3137 /* }}} */
3138
3139 /* {{{ s_mp_div_2d(mp, d) */
3140
3141 /*
3142   Divide the integer by 2^d, where d is a number of bits.  This
3143   amounts to a bitwise shift of the value, and does not require the
3144   full division code (used in Barrett reduction, see below)
3145  */
3146 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3147 {
3148   int       ix;
3149   mp_digit  save, next, mask;
3150
3151   s_mp_rshd(mp, d / DIGIT_BIT);
3152   d %= DIGIT_BIT;
3153   if (d) {
3154     mask = ((mp_digit)1 << d) - 1;
3155     save = 0;
3156     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3157       next = DIGIT(mp, ix) & mask;
3158       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3159       save = next;
3160     }
3161   }
3162   s_mp_clamp(mp);
3163
3164 } /* end s_mp_div_2d() */
3165
3166 /* }}} */
3167
3168 /* {{{ s_mp_norm(a, b, *d) */
3169
3170 /*
3171   s_mp_norm(a, b, *d)
3172
3173   Normalize a and b for division, where b is the divisor.  In order
3174   that we might make good guesses for quotient digits, we want the
3175   leading digit of b to be at least half the radix, which we
3176   accomplish by multiplying a and b by a power of 2.  The exponent
3177   (shift count) is placed in *pd, so that the remainder can be shifted
3178   back at the end of the division process.
3179  */
3180
3181 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3182 {
3183   mp_digit  d;
3184   mp_digit  mask;
3185   mp_digit  b_msd;
3186   mp_err    res    = MP_OKAY;
3187
3188   d = 0;
3189   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);        /* mask is msb of digit */
3190   b_msd = DIGIT(b, USED(b) - 1);
3191   while (!(b_msd & mask)) {
3192     b_msd <<= 1;
3193     ++d;
3194   }
3195
3196   if (d) {
3197     MP_CHECKOK( s_mp_mul_2d(a, d) );
3198     MP_CHECKOK( s_mp_mul_2d(b, d) );
3199   }
3200
3201   *pd = d;
3202 CLEANUP:
3203   return res;
3204
3205 } /* end s_mp_norm() */
3206
3207 /* }}} */
3208
3209 /* }}} */
3210
3211 /* {{{ Primitive digit arithmetic */
3212
3213 /* {{{ s_mp_add_d(mp, d) */
3214
3215 /* Add d to |mp| in place                                                 */
3216 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3217 {
3218 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3219   mp_word   w, k = 0;
3220   mp_size   ix = 1;
3221
3222   w = (mp_word)DIGIT(mp, 0) + d;
3223   DIGIT(mp, 0) = ACCUM(w);
3224   k = CARRYOUT(w);
3225
3226   while(ix < USED(mp) && k) {
3227     w = (mp_word)DIGIT(mp, ix) + k;
3228     DIGIT(mp, ix) = ACCUM(w);
3229     k = CARRYOUT(w);
3230     ++ix;
3231   }
3232
3233   if(k != 0) {
3234     mp_err  res;
3235
3236     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3237       return res;
3238
3239     DIGIT(mp, ix) = (mp_digit)k;
3240   }
3241
3242   return MP_OKAY;
3243 #else
3244   mp_digit * pmp = MP_DIGITS(mp);
3245   mp_digit sum, mp_i, carry = 0;
3246   mp_err   res = MP_OKAY;
3247   int used = (int)MP_USED(mp);
3248
3249   mp_i = *pmp;
3250   *pmp++ = sum = d + mp_i;
3251   carry = (sum < d);
3252   while (carry && --used > 0) {
3253     mp_i = *pmp;
3254     *pmp++ = sum = carry + mp_i;
3255     carry = !sum;
3256   }
3257   if (carry && !used) {
3258     /* mp is growing */
3259     used = MP_USED(mp);
3260     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3261     MP_DIGIT(mp, used) = carry;
3262   }
3263 CLEANUP:
3264   return res;
3265 #endif
3266 } /* end s_mp_add_d() */
3267
3268 /* }}} */
3269
3270 /* {{{ s_mp_sub_d(mp, d) */
3271
3272 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3273 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3274 {
3275 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3276   mp_word   w, b = 0;
3277   mp_size   ix = 1;
3278
3279   /* Compute initial subtraction    */
3280   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3281   b = CARRYOUT(w) ? 0 : 1;
3282   DIGIT(mp, 0) = ACCUM(w);
3283
3284   /* Propagate borrows leftward     */
3285   while(b && ix < USED(mp)) {
3286     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3287     b = CARRYOUT(w) ? 0 : 1;
3288     DIGIT(mp, ix) = ACCUM(w);
3289     ++ix;
3290   }
3291
3292   /* Remove leading zeroes          */
3293   s_mp_clamp(mp);
3294
3295   /* If we have a borrow out, it's a violation of the input invariant */
3296   if(b)
3297     return MP_RANGE;
3298   else
3299     return MP_OKAY;
3300 #else
3301   mp_digit *pmp = MP_DIGITS(mp);
3302   mp_digit mp_i, diff, borrow;
3303   mp_size  used = MP_USED(mp);
3304
3305   mp_i = *pmp;
3306   *pmp++ = diff = mp_i - d;
3307   borrow = (diff > mp_i);
3308   while (borrow && --used) {
3309     mp_i = *pmp;
3310     *pmp++ = diff = mp_i - borrow;
3311     borrow = (diff > mp_i);
3312   }
3313   s_mp_clamp(mp);
3314   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3315 #endif
3316 } /* end s_mp_sub_d() */
3317
3318 /* }}} */
3319
3320 /* {{{ s_mp_mul_d(a, d) */
3321
3322 /* Compute a = a * d, single digit multiplication                         */
3323 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3324 {
3325   mp_err  res;
3326   mp_size used;
3327   int     pow;
3328
3329   if (!d) {
3330     mp_zero(a);
3331     return MP_OKAY;
3332   }
3333   if (d == 1)
3334     return MP_OKAY;
3335   if (0 <= (pow = s_mp_ispow2d(d))) {
3336     return s_mp_mul_2d(a, (mp_digit)pow);
3337   }
3338
3339   used = MP_USED(a);
3340   MP_CHECKOK( s_mp_pad(a, used + 1) );
3341
3342   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3343
3344   s_mp_clamp(a);
3345
3346 CLEANUP:
3347   return res;
3348
3349 } /* end s_mp_mul_d() */
3350
3351 /* }}} */
3352
3353 /* {{{ s_mp_div_d(mp, d, r) */
3354
3355 /*
3356   s_mp_div_d(mp, d, r)
3357
3358   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3359   single digit d.  If r is null, the remainder will be discarded.
3360  */
3361
3362 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3363 {
3364 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3365   mp_word   w = 0, q;
3366 #else
3367   mp_digit  w, q;
3368 #endif
3369   int       ix;
3370   mp_err    res;
3371   mp_int    quot;
3372   mp_int    rem;
3373
3374   if(d == 0)
3375     return MP_RANGE;
3376   if (d == 1) {
3377     if (r)
3378       *r = 0;
3379     return MP_OKAY;
3380   }
3381   /* could check for power of 2 here, but mp_div_d does that. */
3382   if (MP_USED(mp) == 1) {
3383     mp_digit n   = MP_DIGIT(mp,0);
3384     mp_digit rem;
3385
3386     q   = n / d;
3387     rem = n % d;
3388     MP_DIGIT(mp,0) = q;
3389     if (r)
3390       *r = rem;
3391     return MP_OKAY;
3392   }
3393
3394   MP_DIGITS(&rem)  = 0;
3395   MP_DIGITS(&quot) = 0;
3396   /* Make room for the quotient */
3397   MP_CHECKOK( mp_init_size(&quot, USED(mp)) );
3398
3399 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3400   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3401     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3402
3403     if(w >= d) {
3404       q = w / d;
3405       w = w % d;
3406     } else {
3407       q = 0;
3408     }
3409
3410     s_mp_lshd(&quot, 1);
3411     DIGIT(&quot, 0) = (mp_digit)q;
3412   }
3413 #else
3414   {
3415     mp_digit p;
3416 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3417     mp_digit norm;
3418 #endif
3419
3420     MP_CHECKOK( mp_init_copy(&rem, mp) );
3421
3422 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3423     MP_DIGIT(&quot, 0) = d;
3424     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3425     if (norm)
3426       d <<= norm;
3427     MP_DIGIT(&quot, 0) = 0;
3428 #endif
3429
3430     p = 0;
3431     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3432       w = DIGIT(&rem, ix);
3433
3434       if (p) {
3435         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3436       } else if (w >= d) {
3437         q = w / d;
3438         w = w % d;
3439       } else {
3440         q = 0;
3441       }
3442
3443       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3444       DIGIT(&quot, 0) = q;
3445       p = w;
3446     }
3447 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3448     if (norm)
3449       w >>= norm;
3450 #endif
3451   }
3452 #endif
3453
3454   /* Deliver the remainder, if desired */
3455   if(r)
3456     *r = (mp_digit)w;
3457
3458   s_mp_clamp(&quot);
3459   mp_exch(&quot, mp);
3460 CLEANUP:
3461   mp_clear(&quot);
3462   mp_clear(&rem);
3463
3464   return res;
3465 } /* end s_mp_div_d() */
3466
3467 /* }}} */
3468
3469
3470 /* }}} */
3471
3472 /* {{{ Primitive full arithmetic */
3473
3474 /* {{{ s_mp_add(a, b) */
3475
3476 /* Compute a = |a| + |b|                                                  */
3477 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3478 {
3479 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3480   mp_word   w = 0;
3481 #else
3482   mp_digit  d, sum, carry = 0;
3483 #endif
3484   mp_digit *pa, *pb;
3485   mp_size   ix;
3486   mp_size   used;
3487   mp_err    res;
3488
3489   /* Make sure a has enough precision for the output value */
3490   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3491     return res;
3492
3493   /*
3494     Add up all digits up to the precision of b.  If b had initially
3495     the same precision as a, or greater, we took care of it by the
3496     padding step above, so there is no problem.  If b had initially
3497     less precision, we'll have to make sure the carry out is duly
3498     propagated upward among the higher-order digits of the sum.
3499    */
3500   pa = MP_DIGITS(a);
3501   pb = MP_DIGITS(b);
3502   used = MP_USED(b);
3503   for(ix = 0; ix < used; ix++) {
3504 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3505     w = w + *pa + *pb++;
3506     *pa++ = ACCUM(w);
3507     w = CARRYOUT(w);
3508 #else
3509     d = *pa;
3510     sum = d + *pb++;
3511     d = (sum < d);                      /* detect overflow */
3512     *pa++ = sum += carry;
3513     carry = d + (sum < carry);          /* detect overflow */
3514 #endif
3515   }
3516
3517   /* If we run out of 'b' digits before we're actually done, make
3518      sure the carries get propagated upward...
3519    */
3520   used = MP_USED(a);
3521 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3522   while (w && ix < used) {
3523     w = w + *pa;
3524     *pa++ = ACCUM(w);
3525     w = CARRYOUT(w);
3526     ++ix;
3527   }
3528 #else
3529   while (carry && ix < used) {
3530     sum = carry + *pa;
3531     *pa++ = sum;
3532     carry = !sum;
3533     ++ix;
3534   }
3535 #endif
3536
3537   /* If there's an overall carry out, increase precision and include
3538      it.  We could have done this initially, but why touch the memory
3539      allocator unless we're sure we have to?
3540    */
3541 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3542   if (w) {
3543     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3544       return res;
3545
3546     DIGIT(a, ix) = (mp_digit)w;
3547   }
3548 #else
3549   if (carry) {
3550     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3551       return res;
3552
3553     DIGIT(a, used) = carry;
3554   }
3555 #endif
3556
3557   return MP_OKAY;
3558 } /* end s_mp_add() */
3559
3560 /* }}} */
3561
3562 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3563 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3564 {
3565   mp_digit *pa, *pb, *pc;
3566 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3567   mp_word   w = 0;
3568 #else
3569   mp_digit  sum, carry = 0, d;
3570 #endif
3571   mp_size   ix;
3572   mp_size   used;
3573   mp_err    res;
3574
3575   MP_SIGN(c) = MP_SIGN(a);
3576   if (MP_USED(a) < MP_USED(b)) {
3577     const mp_int *xch = a;
3578     a = b;
3579     b = xch;
3580   }
3581
3582   /* Make sure a has enough precision for the output value */
3583   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3584     return res;
3585
3586   /*
3587     Add up all digits up to the precision of b.  If b had initially
3588     the same precision as a, or greater, we took care of it by the
3589     exchange step above, so there is no problem.  If b had initially
3590     less precision, we'll have to make sure the carry out is duly
3591     propagated upward among the higher-order digits of the sum.
3592    */
3593   pa = MP_DIGITS(a);
3594   pb = MP_DIGITS(b);
3595   pc = MP_DIGITS(c);
3596   used = MP_USED(b);
3597   for (ix = 0; ix < used; ix++) {
3598 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3599     w = w + *pa++ + *pb++;
3600     *pc++ = ACCUM(w);
3601     w = CARRYOUT(w);
3602 #else
3603     d = *pa++;
3604     sum = d + *pb++;
3605     d = (sum < d);                      /* detect overflow */
3606     *pc++ = sum += carry;
3607     carry = d + (sum < carry);          /* detect overflow */
3608 #endif
3609   }
3610
3611   /* If we run out of 'b' digits before we're actually done, make
3612      sure the carries get propagated upward...
3613    */
3614   for (used = MP_USED(a); ix < used; ++ix) {
3615 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3616     w = w + *pa++;
3617     *pc++ = ACCUM(w);
3618     w = CARRYOUT(w);
3619 #else
3620     *pc++ = sum = carry + *pa++;
3621     carry = (sum < carry);
3622 #endif
3623   }
3624
3625   /* If there's an overall carry out, increase precision and include
3626      it.  We could have done this initially, but why touch the memory
3627      allocator unless we're sure we have to?
3628    */
3629 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3630   if (w) {
3631     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3632       return res;
3633
3634     DIGIT(c, used) = (mp_digit)w;
3635     ++used;
3636   }
3637 #else
3638   if (carry) {
3639     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3640       return res;
3641
3642     DIGIT(c, used) = carry;
3643     ++used;
3644   }
3645 #endif
3646   MP_USED(c) = used;
3647   return MP_OKAY;
3648 }
3649 /* {{{ s_mp_add_offset(a, b, offset) */
3650
3651 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3652 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3653 {
3654 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3655   mp_word   w, k = 0;
3656 #else
3657   mp_digit  d, sum, carry = 0;
3658 #endif
3659   mp_size   ib;
3660   mp_size   ia;
3661   mp_size   lim;
3662   mp_err    res;
3663
3664   /* Make sure a has enough precision for the output value */
3665   lim = MP_USED(b) + offset;
3666   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3667     return res;
3668
3669   /*
3670     Add up all digits up to the precision of b.  If b had initially
3671     the same precision as a, or greater, we took care of it by the
3672     padding step above, so there is no problem.  If b had initially
3673     less precision, we'll have to make sure the carry out is duly
3674     propagated upward among the higher-order digits of the sum.
3675    */
3676   lim = USED(b);
3677   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3678 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3679     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3680     DIGIT(a, ia) = ACCUM(w);
3681     k = CARRYOUT(w);
3682 #else
3683     d = MP_DIGIT(a, ia);
3684     sum = d + MP_DIGIT(b, ib);
3685     d = (sum < d);
3686     MP_DIGIT(a,ia) = sum += carry;
3687     carry = d + (sum < carry);
3688 #endif
3689   }
3690
3691   /* If we run out of 'b' digits before we're actually done, make
3692      sure the carries get propagated upward...
3693    */
3694 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3695   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3696     w = (mp_word)DIGIT(a, ia) + k;
3697     DIGIT(a, ia) = ACCUM(w);
3698     k = CARRYOUT(w);
3699   }
3700 #else
3701   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3702     d = MP_DIGIT(a, ia);
3703     MP_DIGIT(a,ia) = sum = d + carry;
3704     carry = (sum < d);
3705   }
3706 #endif
3707
3708   /* If there's an overall carry out, increase precision and include
3709      it.  We could have done this initially, but why touch the memory
3710      allocator unless we're sure we have to?
3711    */
3712 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3713   if(k) {
3714     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3715       return res;
3716
3717     DIGIT(a, ia) = (mp_digit)k;
3718   }
3719 #else
3720   if (carry) {
3721     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3722       return res;
3723
3724     DIGIT(a, lim) = carry;
3725   }
3726 #endif
3727   s_mp_clamp(a);
3728
3729   return MP_OKAY;
3730
3731 } /* end s_mp_add_offset() */
3732
3733 /* }}} */
3734
3735 /* {{{ s_mp_sub(a, b) */
3736
3737 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3738 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3739 {
3740   mp_digit *pa, *pb, *limit;
3741 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3742   mp_sword  w = 0;
3743 #else
3744   mp_digit  d, diff, borrow = 0;
3745 #endif
3746
3747   /*
3748     Subtract and propagate borrow.  Up to the precision of b, this
3749     accounts for the digits of b; after that, we just make sure the
3750     carries get to the right place.  This saves having to pad b out to
3751     the precision of a just to make the loops work right...
3752    */
3753   pa = MP_DIGITS(a);
3754   pb = MP_DIGITS(b);
3755   limit = pb + MP_USED(b);
3756   while (pb < limit) {
3757 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3758     w = w + *pa - *pb++;
3759     *pa++ = ACCUM(w);
3760     w >>= MP_DIGIT_BIT;
3761 #else
3762     d = *pa;
3763     diff = d - *pb++;
3764     d = (diff > d);                             /* detect borrow */
3765     if (borrow && --diff == MP_DIGIT_MAX)
3766       ++d;
3767     *pa++ = diff;
3768     borrow = d;
3769 #endif
3770   }
3771   limit = MP_DIGITS(a) + MP_USED(a);
3772 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3773   while (w && pa < limit) {
3774     w = w + *pa;
3775     *pa++ = ACCUM(w);
3776     w >>= MP_DIGIT_BIT;
3777   }
3778 #else
3779   while (borrow && pa < limit) {
3780     d = *pa;
3781     *pa++ = diff = d - borrow;
3782     borrow = (diff > d);
3783   }
3784 #endif
3785
3786   /* Clobber any leading zeroes we created    */
3787   s_mp_clamp(a);
3788
3789   /*
3790      If there was a borrow out, then |b| > |a| in violation
3791      of our input invariant.  We've already done the work,
3792      but we'll at least complain about it...
3793    */
3794 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3795   return w ? MP_RANGE : MP_OKAY;
3796 #else
3797   return borrow ? MP_RANGE : MP_OKAY;
3798 #endif
3799 } /* end s_mp_sub() */
3800
3801 /* }}} */
3802
3803 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3804 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3805 {
3806   mp_digit *pa, *pb, *pc;
3807 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3808   mp_sword  w = 0;
3809 #else
3810   mp_digit  d, diff, borrow = 0;
3811 #endif
3812   int       ix, limit;
3813   mp_err    res;
3814
3815   MP_SIGN(c) = MP_SIGN(a);
3816
3817   /* Make sure a has enough precision for the output value */
3818   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3819     return res;
3820
3821   /*
3822     Subtract and propagate borrow.  Up to the precision of b, this
3823     accounts for the digits of b; after that, we just make sure the
3824     carries get to the right place.  This saves having to pad b out to
3825     the precision of a just to make the loops work right...
3826    */
3827   pa = MP_DIGITS(a);
3828   pb = MP_DIGITS(b);
3829   pc = MP_DIGITS(c);
3830   limit = MP_USED(b);
3831   for (ix = 0; ix < limit; ++ix) {
3832 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3833     w = w + *pa++ - *pb++;
3834     *pc++ = ACCUM(w);
3835     w >>= MP_DIGIT_BIT;
3836 #else
3837     d = *pa++;
3838     diff = d - *pb++;
3839     d = (diff > d);
3840     if (borrow && --diff == MP_DIGIT_MAX)
3841       ++d;
3842     *pc++ = diff;
3843     borrow = d;
3844 #endif
3845   }
3846   for (limit = MP_USED(a); ix < limit; ++ix) {
3847 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3848     w = w + *pa++;
3849     *pc++ = ACCUM(w);
3850     w >>= MP_DIGIT_BIT;
3851 #else
3852     d = *pa++;
3853     *pc++ = diff = d - borrow;
3854     borrow = (diff > d);
3855 #endif
3856   }
3857
3858   /* Clobber any leading zeroes we created    */
3859   MP_USED(c) = ix;
3860   s_mp_clamp(c);
3861
3862   /*
3863      If there was a borrow out, then |b| > |a| in violation
3864      of our input invariant.  We've already done the work,
3865      but we'll at least complain about it...
3866    */
3867 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3868   return w ? MP_RANGE : MP_OKAY;
3869 #else
3870   return borrow ? MP_RANGE : MP_OKAY;
3871 #endif
3872 }
3873 /* {{{ s_mp_mul(a, b) */
3874
3875 /* Compute a = |a| * |b|                                                  */
3876 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3877 {
3878   return mp_mul(a, b, a);
3879 } /* end s_mp_mul() */
3880
3881 /* }}} */
3882
3883 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3884 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3885 #define MP_MUL_DxD(a, b, Phi, Plo) \
3886   { unsigned long long product = (unsigned long long)a * b; \
3887     Plo = (mp_digit)product; \
3888     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3889 #elif defined(OSF1)
3890 #define MP_MUL_DxD(a, b, Phi, Plo) \
3891   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3892     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3893 #else
3894 #define MP_MUL_DxD(a, b, Phi, Plo) \
3895   { mp_digit a0b1, a1b0; \
3896     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3897     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3898     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3899     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3900     a1b0 += a0b1; \
3901     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3902     if (a1b0 < a0b1)  \
3903       Phi += MP_HALF_RADIX; \
3904     a1b0 <<= MP_HALF_DIGIT_BIT; \
3905     Plo += a1b0; \
3906     if (Plo < a1b0) \
3907       ++Phi; \
3908   }
3909 #endif
3910
3911 #if !defined(MP_ASSEMBLY_MULTIPLY)
3912 /* c = a * b */
3913 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3914 {
3915 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3916   mp_digit   d = 0;
3917
3918   /* Inner product:  Digits of a */
3919   while (a_len--) {
3920     mp_word w = ((mp_word)b * *a++) + d;
3921     *c++ = ACCUM(w);
3922     d = CARRYOUT(w);
3923   }
3924   *c = d;
3925 #else
3926   mp_digit carry = 0;
3927   while (a_len--) {
3928     mp_digit a_i = *a++;
3929     mp_digit a0b0, a1b1;
3930
3931     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3932
3933     a0b0 += carry;
3934     if (a0b0 < carry)
3935       ++a1b1;
3936     *c++ = a0b0;
3937     carry = a1b1;
3938   }
3939   *c = carry;
3940 #endif
3941 }
3942
3943 /* c += a * b */
3944 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3945                               mp_digit *c)
3946 {
3947 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3948   mp_digit   d = 0;
3949
3950   /* Inner product:  Digits of a */
3951   while (a_len--) {
3952     mp_word w = ((mp_word)b * *a++) + *c + d;
3953     *c++ = ACCUM(w);
3954     d = CARRYOUT(w);
3955   }
3956   *c = d;
3957 #else
3958   mp_digit carry = 0;
3959   while (a_len--) {
3960     mp_digit a_i = *a++;
3961     mp_digit a0b0, a1b1;
3962
3963     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3964
3965     a0b0 += carry;
3966     if (a0b0 < carry)
3967       ++a1b1;
3968     a0b0 += a_i = *c;
3969     if (a0b0 < a_i)
3970       ++a1b1;
3971     *c++ = a0b0;
3972     carry = a1b1;
3973   }
3974   *c = carry;
3975 #endif
3976 }
3977
3978 /* Presently, this is only used by the Montgomery arithmetic code. */
3979 /* c += a * b */
3980 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3981 {
3982 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3983   mp_digit   d = 0;
3984
3985   /* Inner product:  Digits of a */
3986   while (a_len--) {
3987     mp_word w = ((mp_word)b * *a++) + *c + d;
3988     *c++ = ACCUM(w);
3989     d = CARRYOUT(w);
3990   }
3991
3992   while (d) {
3993     mp_word w = (mp_word)*c + d;
3994     *c++ = ACCUM(w);
3995     d = CARRYOUT(w);
3996   }
3997 #else
3998   mp_digit carry = 0;
3999   while (a_len--) {
4000     mp_digit a_i = *a++;
4001     mp_digit a0b0, a1b1;
4002
4003     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4004
4005     a0b0 += carry;
4006     if (a0b0 < carry)
4007       ++a1b1;
4008
4009     a0b0 += a_i = *c;
4010     if (a0b0 < a_i)
4011       ++a1b1;
4012
4013     *c++ = a0b0;
4014     carry = a1b1;
4015   }
4016   while (carry) {
4017     mp_digit c_i = *c;
4018     carry += c_i;
4019     *c++ = carry;
4020     carry = carry < c_i;
4021   }
4022 #endif
4023 }
4024 #endif
4025
4026 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4027 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4028 #define MP_SQR_D(a, Phi, Plo) \
4029   { unsigned long long square = (unsigned long long)a * a; \
4030     Plo = (mp_digit)square; \
4031     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4032 #elif defined(OSF1)
4033 #define MP_SQR_D(a, Phi, Plo) \
4034   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4035     Phi = asm ("umulh %a0, %a0, %v0", a); }
4036 #else
4037 #define MP_SQR_D(a, Phi, Plo) \
4038   { mp_digit Pmid; \
4039     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4040     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4041     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4042     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4043     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4044     Plo += Pmid;  \
4045     if (Plo < Pmid)  \
4046       ++Phi;  \
4047   }
4048 #endif
4049
4050 #if !defined(MP_ASSEMBLY_SQUARE)
4051 /* Add the squares of the digits of a to the digits of b. */
4052 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4053 {
4054 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4055   mp_word  w;
4056   mp_digit d;
4057   mp_size  ix;
4058
4059   w  = 0;
4060 #define ADD_SQUARE(n) \
4061     d = pa[n]; \
4062     w += (d * (mp_word)d) + ps[2*n]; \
4063     ps[2*n] = ACCUM(w); \
4064     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4065     ps[2*n+1] = ACCUM(w); \
4066     w = (w >> DIGIT_BIT)
4067
4068   for (ix = a_len; ix >= 4; ix -= 4) {
4069     ADD_SQUARE(0);
4070     ADD_SQUARE(1);
4071     ADD_SQUARE(2);
4072     ADD_SQUARE(3);
4073     pa += 4;
4074     ps += 8;
4075   }
4076   if (ix) {
4077     ps += 2*ix;
4078     pa += ix;
4079     switch (ix) {
4080     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4081     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4082     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4083     case 0: break;
4084     }
4085   }
4086   while (w) {
4087     w += *ps;
4088     *ps++ = ACCUM(w);
4089     w = (w >> DIGIT_BIT);
4090   }
4091 #else
4092   mp_digit carry = 0;
4093   while (a_len--) {
4094     mp_digit a_i = *pa++;
4095     mp_digit a0a0, a1a1;
4096
4097     MP_SQR_D(a_i, a1a1, a0a0);
4098
4099     /* here a1a1 and a0a0 constitute a_i ** 2 */
4100     a0a0 += carry;
4101     if (a0a0 < carry)
4102       ++a1a1;
4103
4104     /* now add to ps */
4105     a0a0 += a_i = *ps;
4106     if (a0a0 < a_i)
4107       ++a1a1;
4108     *ps++ = a0a0;
4109     a1a1 += a_i = *ps;
4110     carry = (a1a1 < a_i);
4111     *ps++ = a1a1;
4112   }
4113   while (carry) {
4114     mp_digit s_i = *ps;
4115     carry += s_i;
4116     *ps++ = carry;
4117     carry = carry < s_i;
4118   }
4119 #endif
4120 }
4121 #endif
4122
4123 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4124 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4125 /*
4126 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4127 ** so its high bit is 1.   This code is from NSPR.
4128 */
4129 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4130                        mp_digit *qp, mp_digit *rp)
4131 {
4132     mp_digit d1, d0, q1, q0;
4133     mp_digit r1, r0, m;
4134
4135     d1 = divisor >> MP_HALF_DIGIT_BIT;
4136     d0 = divisor & MP_HALF_DIGIT_MAX;
4137     r1 = Nhi % d1;
4138     q1 = Nhi / d1;
4139     m = q1 * d0;
4140     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4141     if (r1 < m) {
4142         q1--, r1 += divisor;
4143         if (r1 >= divisor && r1 < m) {
4144             q1--, r1 += divisor;
4145         }
4146     }
4147     r1 -= m;
4148     r0 = r1 % d1;
4149     q0 = r1 / d1;
4150     m = q0 * d0;
4151     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4152     if (r0 < m) {
4153         q0--, r0 += divisor;
4154         if (r0 >= divisor && r0 < m) {
4155             q0--, r0 += divisor;
4156         }
4157     }
4158     if (qp)
4159         *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4160     if (rp)
4161         *rp = r0 - m;
4162     return MP_OKAY;
4163 }
4164 #endif
4165
4166 #if MP_SQUARE
4167 /* {{{ s_mp_sqr(a) */
4168
4169 mp_err   s_mp_sqr(mp_int *a)
4170 {
4171   mp_err   res;
4172   mp_int   tmp;
4173
4174   if((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
4175     return res;
4176   res = mp_sqr(a, &tmp);
4177   if (res == MP_OKAY) {
4178     s_mp_exch(&tmp, a);
4179   }
4180   mp_clear(&tmp);
4181   return res;
4182 }
4183
4184 /* }}} */
4185 #endif
4186
4187 /* {{{ s_mp_div(a, b) */
4188
4189 /*
4190   s_mp_div(a, b)
4191
4192   Compute a = a / b and b = a mod b.  Assumes b > a.
4193  */
4194
4195 mp_err   s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4196                   mp_int *div,  /* i: divisor                */
4197                   mp_int *quot) /* i: 0;        o: quotient  */
4198 {
4199   mp_int   part, t;
4200 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4201   mp_word  q_msd;
4202 #else
4203   mp_digit q_msd;
4204 #endif
4205   mp_err   res;
4206   mp_digit d;
4207   mp_digit div_msd;
4208   int      ix;
4209
4210   if(mp_cmp_z(div) == 0)
4211     return MP_RANGE;
4212
4213   /* Shortcut if divisor is power of two */
4214   if((ix = s_mp_ispow2(div)) >= 0) {
4215     MP_CHECKOK( mp_copy(rem, quot) );
4216     s_mp_div_2d(quot, (mp_digit)ix);
4217     s_mp_mod_2d(rem,  (mp_digit)ix);
4218
4219     return MP_OKAY;
4220   }
4221
4222   DIGITS(&t) = 0;
4223   MP_SIGN(rem) = ZPOS;
4224   MP_SIGN(div) = ZPOS;
4225
4226   /* A working temporary for division     */
4227   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem)));
4228
4229   /* Normalize to optimize guessing       */
4230   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4231
4232   part = *rem;
4233
4234   /* Perform the division itself...woo!   */
4235   MP_USED(quot) = MP_ALLOC(quot);
4236
4237   /* Find a partial substring of rem which is at least div */
4238   /* If we didn't find one, we're finished dividing    */
4239   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4240     int i;
4241     int unusedRem;
4242
4243     unusedRem = MP_USED(rem) - MP_USED(div);
4244     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4245     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4246     MP_USED(&part)   = MP_USED(div);
4247     if (s_mp_cmp(&part, div) < 0) {
4248       -- unusedRem;
4249 #if MP_ARGCHK == 2
4250       assert(unusedRem >= 0);
4251 #endif
4252       -- MP_DIGITS(&part);
4253       ++ MP_USED(&part);
4254       ++ MP_ALLOC(&part);
4255     }
4256
4257     /* Compute a guess for the next quotient digit       */
4258     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4259     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4260     if (q_msd >= div_msd) {
4261       q_msd = 1;
4262     } else if (MP_USED(&part) > 1) {
4263 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4264       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4265       q_msd /= div_msd;
4266       if (q_msd == RADIX)
4267         --q_msd;
4268 #else
4269       mp_digit r;
4270       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4271                                   div_msd, &q_msd, &r) );
4272 #endif
4273     } else {
4274       q_msd = 0;
4275     }
4276 #if MP_ARGCHK == 2
4277     assert(q_msd > 0); /* This case should never occur any more. */
4278 #endif
4279     if (q_msd <= 0)
4280       break;
4281
4282     /* See what that multiplies out to                   */
4283     mp_copy(div, &t);
4284     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4285
4286     /*
4287        If it's too big, back it off.  We should not have to do this
4288        more than once, or, in rare cases, twice.  Knuth describes a
4289        method by which this could be reduced to a maximum of once, but
4290        I didn't implement that here.
4291      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4292      */
4293     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4294       --q_msd;
4295       s_mp_sub(&t, div);        /* t -= div */
4296     }
4297     if (i < 0) {
4298       res = MP_RANGE;
4299       goto CLEANUP;
4300     }
4301
4302     /* At this point, q_msd should be the right next digit   */
4303     MP_CHECKOK( s_mp_sub(&part, &t) );  /* part -= t */
4304     s_mp_clamp(rem);
4305
4306     /*
4307       Include the digit in the quotient.  We allocated enough memory
4308       for any quotient we could ever possibly get, so we should not
4309       have to check for failures here
4310      */
4311     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4312   }
4313
4314   /* Denormalize remainder                */
4315   if (d) {
4316     s_mp_div_2d(rem, d);
4317   }
4318
4319   s_mp_clamp(quot);
4320
4321 CLEANUP:
4322   mp_clear(&t);
4323
4324   return res;
4325
4326 } /* end s_mp_div() */
4327
4328
4329 /* }}} */
4330
4331 /* {{{ s_mp_2expt(a, k) */
4332
4333 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4334 {
4335   mp_err    res;
4336   mp_size   dig, bit;
4337
4338   dig = k / DIGIT_BIT;
4339   bit = k % DIGIT_BIT;
4340
4341   mp_zero(a);
4342   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4343     return res;
4344
4345   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4346
4347   return MP_OKAY;
4348
4349 } /* end s_mp_2expt() */
4350
4351 /* }}} */
4352
4353 /* {{{ s_mp_reduce(x, m, mu) */
4354
4355 /*
4356   Compute Barrett reduction, x (mod m), given a precomputed value for
4357   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4358   faster than straight division, when many reductions by the same
4359   value of m are required (such as in modular exponentiation).  This
4360   can nearly halve the time required to do modular exponentiation,
4361   as compared to using the full integer divide to reduce.
4362
4363   This algorithm was derived from the _Handbook of Applied
4364   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4365   pp. 603-604.
4366  */
4367
4368 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4369 {
4370   mp_int   q;
4371   mp_err   res;
4372
4373   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4374     return res;
4375
4376   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4377   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4378   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4379
4380   /* x = x mod b^(k+1), quick (no division) */
4381   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4382
4383   /* q = q * m mod b^(k+1), quick (no division) */
4384   s_mp_mul(&q, m);
4385   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4386
4387   /* x = x - q */
4388   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4389     goto CLEANUP;
4390
4391   /* If x < 0, add b^(k+1) to it */
4392   if(mp_cmp_z(x) < 0) {
4393     mp_set(&q, 1);
4394     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4395       goto CLEANUP;
4396     if((res = mp_add(x, &q, x)) != MP_OKAY)
4397       goto CLEANUP;
4398   }
4399
4400   /* Back off if it's too big */
4401   while(mp_cmp(x, m) >= 0) {
4402     if((res = s_mp_sub(x, m)) != MP_OKAY)
4403       break;
4404   }
4405
4406  CLEANUP:
4407   mp_clear(&q);
4408
4409   return res;
4410
4411 } /* end s_mp_reduce() */
4412
4413 /* }}} */
4414
4415 /* }}} */
4416
4417 /* {{{ Primitive comparisons */
4418
4419 /* {{{ s_mp_cmp(a, b) */
4420
4421 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4422 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4423 {
4424   mp_size used_a = MP_USED(a);
4425   {
4426     mp_size used_b = MP_USED(b);
4427
4428     if (used_a > used_b)
4429       goto IS_GT;
4430     if (used_a < used_b)
4431       goto IS_LT;
4432   }
4433   {
4434     mp_digit *pa, *pb;
4435     mp_digit da = 0, db = 0;
4436
4437 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4438
4439     pa = MP_DIGITS(a) + used_a;
4440     pb = MP_DIGITS(b) + used_a;
4441     while (used_a >= 4) {
4442       pa     -= 4;
4443       pb     -= 4;
4444       used_a -= 4;
4445       CMP_AB(3);
4446       CMP_AB(2);
4447       CMP_AB(1);
4448       CMP_AB(0);
4449     }
4450     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4451       /* do nothing */;
4452 done:
4453     if (da > db)
4454       goto IS_GT;
4455     if (da < db)
4456       goto IS_LT;
4457   }
4458   return MP_EQ;
4459 IS_LT:
4460   return MP_LT;
4461 IS_GT:
4462   return MP_GT;
4463 } /* end s_mp_cmp() */
4464
4465 /* }}} */
4466
4467 /* {{{ s_mp_cmp_d(a, d) */
4468
4469 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4470 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4471 {
4472   if(USED(a) > 1)
4473     return MP_GT;
4474
4475   if(DIGIT(a, 0) < d)
4476     return MP_LT;
4477   else if(DIGIT(a, 0) > d)
4478     return MP_GT;
4479   else
4480     return MP_EQ;
4481
4482 } /* end s_mp_cmp_d() */
4483
4484 /* }}} */
4485
4486 /* {{{ s_mp_ispow2(v) */
4487
4488 /*
4489   Returns -1 if the value is not a power of two; otherwise, it returns
4490   k such that v = 2^k, i.e. lg(v).
4491  */
4492 int      s_mp_ispow2(const mp_int *v)
4493 {
4494   mp_digit d;
4495   int      extra = 0, ix;
4496
4497   ix = MP_USED(v) - 1;
4498   d = MP_DIGIT(v, ix); /* most significant digit of v */
4499
4500   extra = s_mp_ispow2d(d);
4501   if (extra < 0 || ix == 0)
4502     return extra;
4503
4504   while (--ix >= 0) {
4505     if (DIGIT(v, ix) != 0)
4506       return -1; /* not a power of two */
4507     extra += MP_DIGIT_BIT;
4508   }
4509
4510   return extra;
4511
4512 } /* end s_mp_ispow2() */
4513
4514 /* }}} */
4515
4516 /* {{{ s_mp_ispow2d(d) */
4517
4518 int      s_mp_ispow2d(mp_digit d)
4519 {
4520   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4521     int pow = 0;
4522 #if defined (MP_USE_UINT_DIGIT)
4523     if (d & 0xffff0000U)
4524       pow += 16;
4525     if (d & 0xff00ff00U)
4526       pow += 8;
4527     if (d & 0xf0f0f0f0U)
4528       pow += 4;
4529     if (d & 0xccccccccU)
4530       pow += 2;
4531     if (d & 0xaaaaaaaaU)
4532       pow += 1;
4533 #elif defined(MP_USE_LONG_LONG_DIGIT)
4534     if (d & 0xffffffff00000000ULL)
4535       pow += 32;
4536     if (d & 0xffff0000ffff0000ULL)
4537       pow += 16;
4538     if (d & 0xff00ff00ff00ff00ULL)
4539       pow += 8;
4540     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4541       pow += 4;
4542     if (d & 0xccccccccccccccccULL)
4543       pow += 2;
4544     if (d & 0xaaaaaaaaaaaaaaaaULL)
4545       pow += 1;
4546 #elif defined(MP_USE_LONG_DIGIT)
4547     if (d & 0xffffffff00000000UL)
4548       pow += 32;
4549     if (d & 0xffff0000ffff0000UL)
4550       pow += 16;
4551     if (d & 0xff00ff00ff00ff00UL)
4552       pow += 8;
4553     if (d & 0xf0f0f0f0f0f0f0f0UL)
4554       pow += 4;
4555     if (d & 0xccccccccccccccccUL)
4556       pow += 2;
4557     if (d & 0xaaaaaaaaaaaaaaaaUL)
4558       pow += 1;
4559 #else
4560 #error "unknown type for mp_digit"
4561 #endif
4562     return pow;
4563   }
4564   return -1;
4565
4566 } /* end s_mp_ispow2d() */
4567
4568 /* }}} */
4569
4570 /* }}} */
4571
4572 /* {{{ Primitive I/O helpers */
4573
4574 /* {{{ s_mp_tovalue(ch, r) */
4575
4576 /*
4577   Convert the given character to its digit value, in the given radix.
4578   If the given character is not understood in the given radix, -1 is
4579   returned.  Otherwise the digit's numeric value is returned.
4580
4581   The results will be odd if you use a radix < 2 or > 62, you are
4582   expected to know what you're up to.
4583  */
4584 int      s_mp_tovalue(char ch, int r)
4585 {
4586   int    val, xch;
4587
4588   if(r > 36)
4589     xch = ch;
4590   else
4591     xch = toupper(ch);
4592
4593   if(isdigit(xch))
4594     val = xch - '0';
4595   else if(isupper(xch))
4596     val = xch - 'A' + 10;
4597   else if(islower(xch))
4598     val = xch - 'a' + 36;
4599   else if(xch == '+')
4600     val = 62;
4601   else if(xch == '/')
4602     val = 63;
4603   else
4604     return -1;
4605
4606   if(val < 0 || val >= r)
4607     return -1;
4608
4609   return val;
4610
4611 } /* end s_mp_tovalue() */
4612
4613 /* }}} */
4614
4615 /* {{{ s_mp_todigit(val, r, low) */
4616
4617 /*
4618   Convert val to a radix-r digit, if possible.  If val is out of range
4619   for r, returns zero.  Otherwise, returns an ASCII character denoting
4620   the value in the given radix.
4621
4622   The results may be odd if you use a radix < 2 or > 64, you are
4623   expected to know what you're doing.
4624  */
4625
4626 char     s_mp_todigit(mp_digit val, int r, int low)
4627 {
4628   char   ch;
4629
4630   if(val >= r)
4631     return 0;
4632
4633   ch = s_dmap_1[val];
4634
4635   if(r <= 36 && low)
4636     ch = tolower(ch);
4637
4638   return ch;
4639
4640 } /* end s_mp_todigit() */
4641
4642 /* }}} */
4643
4644 /* {{{ s_mp_outlen(bits, radix) */
4645
4646 /*
4647    Return an estimate for how long a string is needed to hold a radix
4648    r representation of a number with 'bits' significant bits, plus an
4649    extra for a zero terminator (assuming C style strings here)
4650  */
4651 int      s_mp_outlen(int bits, int r)
4652 {
4653   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4654
4655 } /* end s_mp_outlen() */
4656
4657 /* }}} */
4658
4659 /* }}} */
4660
4661 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4662 /* mp_read_unsigned_octets(mp, str, len)
4663    Read in a raw value (base 256) into the given mp_int
4664    No sign bit, number is positive.  Leading zeros ignored.
4665  */
4666
4667 mp_err
4668 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4669 {
4670   int            count;
4671   mp_err         res;
4672   mp_digit       d;
4673
4674   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4675
4676   mp_zero(mp);
4677
4678   count = len % sizeof(mp_digit);
4679   if (count) {
4680     for (d = 0; count-- > 0; --len) {
4681       d = (d << 8) | *str++;
4682     }
4683     MP_DIGIT(mp, 0) = d;
4684   }
4685
4686   /* Read the rest of the digits */
4687   for(; len > 0; len -= sizeof(mp_digit)) {
4688     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4689       d = (d << 8) | *str++;
4690     }
4691     if (MP_EQ == mp_cmp_z(mp)) {
4692       if (!d)
4693         continue;
4694     } else {
4695       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4696         return res;
4697     }
4698     MP_DIGIT(mp, 0) = d;
4699   }
4700   return MP_OKAY;
4701 } /* end mp_read_unsigned_octets() */
4702 /* }}} */
4703
4704 /* {{{ mp_unsigned_octet_size(mp) */
4705 int
4706 mp_unsigned_octet_size(const mp_int *mp)
4707 {
4708   int  bytes;
4709   int  ix;
4710   mp_digit  d = 0;
4711
4712   ARGCHK(mp != NULL, MP_BADARG);
4713   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4714
4715   bytes = (USED(mp) * sizeof(mp_digit));
4716
4717   /* subtract leading zeros. */
4718   /* Iterate over each digit... */
4719   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4720     d = DIGIT(mp, ix);
4721     if (d)
4722         break;
4723     bytes -= sizeof(d);
4724   }
4725   if (!bytes)
4726     return 1;
4727
4728   /* Have MSD, check digit bytes, high order first */
4729   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4730     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4731     if (x)
4732         break;
4733     --bytes;
4734   }
4735   return bytes;
4736 } /* end mp_unsigned_octet_size() */
4737 /* }}} */
4738
4739 /* {{{ mp_to_unsigned_octets(mp, str) */
4740 /* output a buffer of big endian octets no longer than specified. */
4741 mp_err
4742 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4743 {
4744   int  ix, pos = 0;
4745   int  bytes;
4746
4747   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4748
4749   bytes = mp_unsigned_octet_size(mp);
4750   ARGCHK(bytes <= maxlen, MP_BADARG);
4751
4752   /* Iterate over each digit... */
4753   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4754     mp_digit  d = DIGIT(mp, ix);
4755     int       jx;
4756
4757     /* Unpack digit bytes, high order first */
4758     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4759       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4760       if (!pos && !x)   /* suppress leading zeros */
4761         continue;
4762       str[pos++] = x;
4763     }
4764   }
4765   if (!pos)
4766     str[pos++] = 0;
4767   return pos;
4768 } /* end mp_to_unsigned_octets() */
4769 /* }}} */
4770
4771 /* {{{ mp_to_signed_octets(mp, str) */
4772 /* output a buffer of big endian octets no longer than specified. */
4773 mp_err
4774 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4775 {
4776   int  ix, pos = 0;
4777   int  bytes;
4778
4779   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4780
4781   bytes = mp_unsigned_octet_size(mp);
4782   ARGCHK(bytes <= maxlen, MP_BADARG);
4783
4784   /* Iterate over each digit... */
4785   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4786     mp_digit  d = DIGIT(mp, ix);
4787     int       jx;
4788
4789     /* Unpack digit bytes, high order first */
4790     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4791       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4792       if (!pos) {
4793         if (!x)         /* suppress leading zeros */
4794           continue;
4795         if (x & 0x80) { /* add one leading zero to make output positive.  */
4796           ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4797           if (bytes + 1 > maxlen)
4798             return MP_BADARG;
4799           str[pos++] = 0;
4800         }
4801       }
4802       str[pos++] = x;
4803     }
4804   }
4805   if (!pos)
4806     str[pos++] = 0;
4807   return pos;
4808 } /* end mp_to_signed_octets() */
4809 /* }}} */
4810
4811 /* {{{ mp_to_fixlen_octets(mp, str) */
4812 /* output a buffer of big endian octets exactly as long as requested. */
4813 mp_err
4814 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4815 {
4816   int  ix, pos = 0;
4817   int  bytes;
4818
4819   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4820
4821   bytes = mp_unsigned_octet_size(mp);
4822   ARGCHK(bytes <= length, MP_BADARG);
4823
4824   /* place any needed leading zeros */
4825   for (;length > bytes; --length) {
4826         *str++ = 0;
4827   }
4828
4829   /* Iterate over each digit... */
4830   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4831     mp_digit  d = DIGIT(mp, ix);
4832     int       jx;
4833
4834     /* Unpack digit bytes, high order first */
4835     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4836       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4837       if (!pos && !x)   /* suppress leading zeros */
4838         continue;
4839       str[pos++] = x;
4840     }
4841   }
4842   if (!pos)
4843     str[pos++] = 0;
4844   return MP_OKAY;
4845 } /* end mp_to_fixlen_octets() */
4846 /* }}} */
4847
4848
4849 /*------------------------------------------------------------------------*/
4850 /* HERE THERE BE DRAGONS                                                  */