security/nss/lib/freebl/ecl/ecl_gf.c

   1 /*
   2  * ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is the elliptic curve math library.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Sun Microsystems, Inc.
  19  * Portions created by the Initial Developer are Copyright (C) 2003
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Stephen Fung <fungstep@hotmail.com> and
  24  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "mpi.h"
  41 #include "mp_gf2m.h"
  42 #include "ecl-priv.h"
  43 #include "mpi-priv.h"
  44 #include <stdlib.h>
  45
  46 /* Allocate memory for a new GFMethod object. */
  47 GFMethod *
  48 GFMethod_new()
  49 {
  50         mp_err res = MP_OKAY;
  51         GFMethod *meth;
  52         meth = (GFMethod *) malloc(sizeof(GFMethod));
  53         if (meth == NULL)
  54                 return NULL;
  55         meth->constructed = MP_YES;
  56         MP_DIGITS(&meth->irr) = 0;
  57         meth->extra_free = NULL;
  58         MP_CHECKOK(mp_init(&meth->irr));
  59
  60   CLEANUP:
  61         if (res != MP_OKAY) {
  62                 GFMethod_free(meth);
  63                 return NULL;
  64         }
  65         return meth;
  66 }
  67
  68 /* Construct a generic GFMethod for arithmetic over prime fields with
  69  * irreducible irr. */
  70 GFMethod *
  71 GFMethod_consGFp(const mp_int *irr)
  72 {
  73         mp_err res = MP_OKAY;
  74         GFMethod *meth = NULL;
  75
  76         meth = GFMethod_new();
  77         if (meth == NULL)
  78                 return NULL;
  79
  80         MP_CHECKOK(mp_copy(irr, &meth->irr));
  81         meth->irr_arr[0] = mpl_significant_bits(irr);
  82         meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
  83                 meth->irr_arr[4] = 0;
  84         switch(MP_USED(&meth->irr)) {
  85         /* maybe we need 1 and 2 words here as well?*/
  86         case 3:
  87                 meth->field_add = &ec_GFp_add_3;
  88                 meth->field_sub = &ec_GFp_sub_3;
  89                 break;
  90         case 4:
  91                 meth->field_add = &ec_GFp_add_4;
  92                 meth->field_sub = &ec_GFp_sub_4;
  93                 break;
  94         case 5:
  95                 meth->field_add = &ec_GFp_add_5;
  96                 meth->field_sub = &ec_GFp_sub_5;
  97                 break;
  98         case 6:
  99                 meth->field_add = &ec_GFp_add_6;
 100                 meth->field_sub = &ec_GFp_sub_6;
 101                 break;
 102         default:
 103                 meth->field_add = &ec_GFp_add;
 104                 meth->field_sub = &ec_GFp_sub;
 105         }
 106         meth->field_neg = &ec_GFp_neg;
 107         meth->field_mod = &ec_GFp_mod;
 108         meth->field_mul = &ec_GFp_mul;
 109         meth->field_sqr = &ec_GFp_sqr;
 110         meth->field_div = &ec_GFp_div;
 111         meth->field_enc = NULL;
 112         meth->field_dec = NULL;
 113         meth->extra1 = NULL;
 114         meth->extra2 = NULL;
 115         meth->extra_free = NULL;
 116
 117   CLEANUP:
 118         if (res != MP_OKAY) {
 119                 GFMethod_free(meth);
 120                 return NULL;
 121         }
 122         return meth;
 123 }
 124
 125 /* Construct a generic GFMethod for arithmetic over binary polynomial
 126  * fields with irreducible irr that has array representation irr_arr (see
 127  * ecl-priv.h for description of the representation).  If irr_arr is NULL,
 128  * then it is constructed from the bitstring representation. */
 129 GFMethod *
 130 GFMethod_consGF2m(const mp_int *irr, const unsigned int irr_arr[5])
 131 {
 132         mp_err res = MP_OKAY;
 133         int ret;
 134         GFMethod *meth = NULL;
 135
 136         meth = GFMethod_new();
 137         if (meth == NULL)
 138                 return NULL;
 139
 140         MP_CHECKOK(mp_copy(irr, &meth->irr));
 141         if (irr_arr != NULL) {
 142                 /* Irreducible polynomials are either trinomials or pentanomials. */
 143                 meth->irr_arr[0] = irr_arr[0];
 144                 meth->irr_arr[1] = irr_arr[1];
 145                 meth->irr_arr[2] = irr_arr[2];
 146                 if (irr_arr[2] > 0) {
 147                         meth->irr_arr[3] = irr_arr[3];
 148                         meth->irr_arr[4] = irr_arr[4];
 149                 } else {
 150                         meth->irr_arr[3] = meth->irr_arr[4] = 0;
 151                 }
 152         } else {
 153                 ret = mp_bpoly2arr(irr, meth->irr_arr, 5);
 154                 /* Irreducible polynomials are either trinomials or pentanomials. */
 155                 if ((ret != 5) && (ret != 3)) {
 156                         res = MP_UNDEF;
 157                         goto CLEANUP;
 158                 }
 159         }
 160         meth->field_add = &ec_GF2m_add;
 161         meth->field_neg = &ec_GF2m_neg;
 162         meth->field_sub = &ec_GF2m_add;
 163         meth->field_mod = &ec_GF2m_mod;
 164         meth->field_mul = &ec_GF2m_mul;
 165         meth->field_sqr = &ec_GF2m_sqr;
 166         meth->field_div = &ec_GF2m_div;
 167         meth->field_enc = NULL;
 168         meth->field_dec = NULL;
 169         meth->extra1 = NULL;
 170         meth->extra2 = NULL;
 171         meth->extra_free = NULL;
 172
 173   CLEANUP:
 174         if (res != MP_OKAY) {
 175                 GFMethod_free(meth);
 176                 return NULL;
 177         }
 178         return meth;
 179 }
 180
 181 /* Free the memory allocated (if any) to a GFMethod object. */
 182 void
 183 GFMethod_free(GFMethod *meth)
 184 {
 185         if (meth == NULL)
 186                 return;
 187         if (meth->constructed == MP_NO)
 188                 return;
 189         mp_clear(&meth->irr);
 190         if (meth->extra_free != NULL)
 191                 meth->extra_free(meth);
 192         free(meth);
 193 }
 194
 195 /* Wrapper functions for generic prime field arithmetic. */
 196
 197 /* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
 198 mp_err
 199 ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
 200                    const GFMethod *meth)
 201 {
 202         /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
 203         mp_err res;
 204
 205         if ((res = mp_add(a, b, r)) != MP_OKAY) {
 206                 return res;
 207         }
 208         if (mp_cmp(r, &meth->irr) >= 0) {
 209                 return mp_sub(r, &meth->irr, r);
 210         }
 211         return res;
 212 }
 213
 214 /* Negates a field element.  Assumes that 0 <= a < meth->irr */
 215 mp_err
 216 ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
 217 {
 218         /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
 219
 220         if (mp_cmp_z(a) == 0) {
 221                 mp_zero(r);
 222                 return MP_OKAY;
 223         }
 224         return mp_sub(&meth->irr, a, r);
 225 }
 226
 227 /* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
 228 mp_err
 229 ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
 230                    const GFMethod *meth)
 231 {
 232         mp_err res = MP_OKAY;
 233
 234         /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
 235         res = mp_sub(a, b, r);
 236         if (res == MP_RANGE) {
 237                 MP_CHECKOK(mp_sub(b, a, r));
 238                 if (mp_cmp_z(r) < 0) {
 239                         MP_CHECKOK(mp_add(r, &meth->irr, r));
 240                 }
 241                 MP_CHECKOK(ec_GFp_neg(r, r, meth));
 242         }
 243         if (mp_cmp_z(r) < 0) {
 244                 MP_CHECKOK(mp_add(r, &meth->irr, r));
 245         }
 246   CLEANUP:
 247         return res;
 248 }
 249 /*
 250  * Inline adds for small curve lengths.
 251  */
 252 /* 3 words */
 253 mp_err
 254 ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
 255                         const GFMethod *meth)
 256 {
 257         mp_err res = MP_OKAY;
 258         mp_digit a0 = 0, a1 = 0, a2 = 0;
 259         mp_digit r0 = 0, r1 = 0, r2 = 0;
 260         mp_digit carry;
 261
 262         switch(MP_USED(a)) {
 263         case 3:
 264                 a2 = MP_DIGIT(a,2);
 265         case 2:
 266                 a1 = MP_DIGIT(a,1);
 267         case 1:
 268                 a0 = MP_DIGIT(a,0);
 269         }
 270         switch(MP_USED(b)) {
 271         case 3:
 272                 r2 = MP_DIGIT(b,2);
 273         case 2:
 274                 r1 = MP_DIGIT(b,1);
 275         case 1:
 276                 r0 = MP_DIGIT(b,0);
 277         }
 278
 279 #ifndef MPI_AMD64_ADD
 280         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 281         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 282         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 283 #else
 284         __asm__ (
 285                 "xorq   %3,%3           \n\t"
 286                 "addq   %4,%0           \n\t"
 287                 "adcq   %5,%1           \n\t"
 288                 "adcq   %6,%2           \n\t"
 289                 "adcq   $0,%3           \n\t"
 290                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
 291                 : "r" (a0), "r" (a1), "r" (a2),
 292                   "0" (r0), "1" (r1), "2" (r2)
 293                 : "%cc" );
 294 #endif
 295
 296         MP_CHECKOK(s_mp_pad(r, 3));
 297         MP_DIGIT(r, 2) = r2;
 298         MP_DIGIT(r, 1) = r1;
 299         MP_DIGIT(r, 0) = r0;
 300         MP_SIGN(r) = MP_ZPOS;
 301         MP_USED(r) = 3;
 302
 303         /* Do quick 'subract' if we've gone over
 304          * (add the 2's complement of the curve field) */
 305          a2 = MP_DIGIT(&meth->irr,2);
 306         if (carry ||  r2 >  a2 ||
 307                 ((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
 308                 a1 = MP_DIGIT(&meth->irr,1);
 309                 a0 = MP_DIGIT(&meth->irr,0);
 310 #ifndef MPI_AMD64_ADD
 311                 MP_SUB_BORROW(r0, a0, r0, 0,     carry);
 312                 MP_SUB_BORROW(r1, a1, r1, carry, carry);
 313                 MP_SUB_BORROW(r2, a2, r2, carry, carry);
 314 #else
 315                 __asm__ (
 316                         "subq   %3,%0           \n\t"
 317                         "sbbq   %4,%1           \n\t"
 318                         "sbbq   %5,%2           \n\t"
 319                         : "=r"(r0), "=r"(r1), "=r"(r2)
 320                         : "r" (a0), "r" (a1), "r" (a2),
 321                           "0" (r0), "1" (r1), "2" (r2)
 322                         : "%cc" );
 323 #endif
 324                 MP_DIGIT(r, 2) = r2;
 325                 MP_DIGIT(r, 1) = r1;
 326                 MP_DIGIT(r, 0) = r0;
 327         }
 328
 329         s_mp_clamp(r);
 330
 331   CLEANUP:
 332         return res;
 333 }
 334
 335 /* 4 words */
 336 mp_err
 337 ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
 338                         const GFMethod *meth)
 339 {
 340         mp_err res = MP_OKAY;
 341         mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
 342         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
 343         mp_digit carry;
 344
 345         switch(MP_USED(a)) {
 346         case 4:
 347                 a3 = MP_DIGIT(a,3);
 348         case 3:
 349                 a2 = MP_DIGIT(a,2);
 350         case 2:
 351                 a1 = MP_DIGIT(a,1);
 352         case 1:
 353                 a0 = MP_DIGIT(a,0);
 354         }
 355         switch(MP_USED(b)) {
 356         case 4:
 357                 r3 = MP_DIGIT(b,3);
 358         case 3:
 359                 r2 = MP_DIGIT(b,2);
 360         case 2:
 361                 r1 = MP_DIGIT(b,1);
 362         case 1:
 363                 r0 = MP_DIGIT(b,0);
 364         }
 365
 366 #ifndef MPI_AMD64_ADD
 367         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 368         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 369         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 370         MP_ADD_CARRY(a3, r3, r3, carry, carry);
 371 #else
 372         __asm__ (
 373                 "xorq   %4,%4           \n\t"
 374                 "addq   %5,%0           \n\t"
 375                 "adcq   %6,%1           \n\t"
 376                 "adcq   %7,%2           \n\t"
 377                 "adcq   %8,%3           \n\t"
 378                 "adcq   $0,%4           \n\t"
 379                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
 380                 : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
 381                   "0" (r0), "1" (r1), "2" (r2), "3" (r3)
 382                 : "%cc" );
 383 #endif
 384
 385         MP_CHECKOK(s_mp_pad(r, 4));
 386         MP_DIGIT(r, 3) = r3;
 387         MP_DIGIT(r, 2) = r2;
 388         MP_DIGIT(r, 1) = r1;
 389         MP_DIGIT(r, 0) = r0;
 390         MP_SIGN(r) = MP_ZPOS;
 391         MP_USED(r) = 4;
 392
 393         /* Do quick 'subract' if we've gone over
 394          * (add the 2's complement of the curve field) */
 395          a3 = MP_DIGIT(&meth->irr,3);
 396         if (carry ||  r3 >  a3 ||
 397                 ((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
 398                 a2 = MP_DIGIT(&meth->irr,2);
 399                 a1 = MP_DIGIT(&meth->irr,1);
 400                 a0 = MP_DIGIT(&meth->irr,0);
 401 #ifndef MPI_AMD64_ADD
 402                 MP_SUB_BORROW(r0, a0, r0, 0,     carry);
 403                 MP_SUB_BORROW(r1, a1, r1, carry, carry);
 404                 MP_SUB_BORROW(r2, a2, r2, carry, carry);
 405                 MP_SUB_BORROW(r3, a3, r3, carry, carry);
 406 #else
 407                 __asm__ (
 408                         "subq   %4,%0           \n\t"
 409                         "sbbq   %5,%1           \n\t"
 410                         "sbbq   %6,%2           \n\t"
 411                         "sbbq   %7,%3           \n\t"
 412                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
 413                         : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
 414                           "0" (r0), "1" (r1), "2" (r2), "3" (r3)
 415                         : "%cc" );
 416 #endif
 417                 MP_DIGIT(r, 3) = r3;
 418                 MP_DIGIT(r, 2) = r2;
 419                 MP_DIGIT(r, 1) = r1;
 420                 MP_DIGIT(r, 0) = r0;
 421         }
 422
 423         s_mp_clamp(r);
 424
 425   CLEANUP:
 426         return res;
 427 }
 428
 429 /* 5 words */
 430 mp_err
 431 ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
 432                         const GFMethod *meth)
 433 {
 434         mp_err res = MP_OKAY;
 435         mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
 436         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
 437         mp_digit carry;
 438
 439         switch(MP_USED(a)) {
 440         case 5:
 441                 a4 = MP_DIGIT(a,4);
 442         case 4:
 443                 a3 = MP_DIGIT(a,3);
 444         case 3:
 445                 a2 = MP_DIGIT(a,2);
 446         case 2:
 447                 a1 = MP_DIGIT(a,1);
 448         case 1:
 449                 a0 = MP_DIGIT(a,0);
 450         }
 451         switch(MP_USED(b)) {
 452         case 5:
 453                 r4 = MP_DIGIT(b,4);
 454         case 4:
 455                 r3 = MP_DIGIT(b,3);
 456         case 3:
 457                 r2 = MP_DIGIT(b,2);
 458         case 2:
 459                 r1 = MP_DIGIT(b,1);
 460         case 1:
 461                 r0 = MP_DIGIT(b,0);
 462         }
 463
 464         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 465         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 466         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 467         MP_ADD_CARRY(a3, r3, r3, carry, carry);
 468         MP_ADD_CARRY(a4, r4, r4, carry, carry);
 469
 470         MP_CHECKOK(s_mp_pad(r, 5));
 471         MP_DIGIT(r, 4) = r4;
 472         MP_DIGIT(r, 3) = r3;
 473         MP_DIGIT(r, 2) = r2;
 474         MP_DIGIT(r, 1) = r1;
 475         MP_DIGIT(r, 0) = r0;
 476         MP_SIGN(r) = MP_ZPOS;
 477         MP_USED(r) = 5;
 478
 479         /* Do quick 'subract' if we've gone over
 480          * (add the 2's complement of the curve field) */
 481          a4 = MP_DIGIT(&meth->irr,4);
 482         if (carry ||  r4 >  a4 ||
 483                 ((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
 484                 a3 = MP_DIGIT(&meth->irr,3);
 485                 a2 = MP_DIGIT(&meth->irr,2);
 486                 a1 = MP_DIGIT(&meth->irr,1);
 487                 a0 = MP_DIGIT(&meth->irr,0);
 488                 MP_SUB_BORROW(r0, a0, r0, 0,     carry);
 489                 MP_SUB_BORROW(r1, a1, r1, carry, carry);
 490                 MP_SUB_BORROW(r2, a2, r2, carry, carry);
 491                 MP_SUB_BORROW(r3, a3, r3, carry, carry);
 492                 MP_SUB_BORROW(r4, a4, r4, carry, carry);
 493                 MP_DIGIT(r, 4) = r4;
 494                 MP_DIGIT(r, 3) = r3;
 495                 MP_DIGIT(r, 2) = r2;
 496                 MP_DIGIT(r, 1) = r1;
 497                 MP_DIGIT(r, 0) = r0;
 498         }
 499
 500         s_mp_clamp(r);
 501
 502   CLEANUP:
 503         return res;
 504 }
 505
 506 /* 6 words */
 507 mp_err
 508 ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
 509                         const GFMethod *meth)
 510 {
 511         mp_err res = MP_OKAY;
 512         mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
 513         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
 514         mp_digit carry;
 515
 516         switch(MP_USED(a)) {
 517         case 6:
 518                 a5 = MP_DIGIT(a,5);
 519         case 5:
 520                 a4 = MP_DIGIT(a,4);
 521         case 4:
 522                 a3 = MP_DIGIT(a,3);
 523         case 3:
 524                 a2 = MP_DIGIT(a,2);
 525         case 2:
 526                 a1 = MP_DIGIT(a,1);
 527         case 1:
 528                 a0 = MP_DIGIT(a,0);
 529         }
 530         switch(MP_USED(b)) {
 531         case 6:
 532                 r5 = MP_DIGIT(b,5);
 533         case 5:
 534                 r4 = MP_DIGIT(b,4);
 535         case 4:
 536                 r3 = MP_DIGIT(b,3);
 537         case 3:
 538                 r2 = MP_DIGIT(b,2);
 539         case 2:
 540                 r1 = MP_DIGIT(b,1);
 541         case 1:
 542                 r0 = MP_DIGIT(b,0);
 543         }
 544
 545         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 546         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 547         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 548         MP_ADD_CARRY(a3, r3, r3, carry, carry);
 549         MP_ADD_CARRY(a4, r4, r4, carry, carry);
 550         MP_ADD_CARRY(a5, r5, r5, carry, carry);
 551
 552         MP_CHECKOK(s_mp_pad(r, 6));
 553         MP_DIGIT(r, 5) = r5;
 554         MP_DIGIT(r, 4) = r4;
 555         MP_DIGIT(r, 3) = r3;
 556         MP_DIGIT(r, 2) = r2;
 557         MP_DIGIT(r, 1) = r1;
 558         MP_DIGIT(r, 0) = r0;
 559         MP_SIGN(r) = MP_ZPOS;
 560         MP_USED(r) = 6;
 561
 562         /* Do quick 'subract' if we've gone over
 563          * (add the 2's complement of the curve field) */
 564         a5 = MP_DIGIT(&meth->irr,5);
 565         if (carry ||  r5 >  a5 ||
 566                 ((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
 567                 a4 = MP_DIGIT(&meth->irr,4);
 568                 a3 = MP_DIGIT(&meth->irr,3);
 569                 a2 = MP_DIGIT(&meth->irr,2);
 570                 a1 = MP_DIGIT(&meth->irr,1);
 571                 a0 = MP_DIGIT(&meth->irr,0);
 572                 MP_SUB_BORROW(r0, a0, r0, 0,     carry);
 573                 MP_SUB_BORROW(r1, a1, r1, carry, carry);
 574                 MP_SUB_BORROW(r2, a2, r2, carry, carry);
 575                 MP_SUB_BORROW(r3, a3, r3, carry, carry);
 576                 MP_SUB_BORROW(r4, a4, r4, carry, carry);
 577                 MP_SUB_BORROW(r5, a5, r5, carry, carry);
 578                 MP_DIGIT(r, 5) = r5;
 579                 MP_DIGIT(r, 4) = r4;
 580                 MP_DIGIT(r, 3) = r3;
 581                 MP_DIGIT(r, 2) = r2;
 582                 MP_DIGIT(r, 1) = r1;
 583                 MP_DIGIT(r, 0) = r0;
 584         }
 585
 586         s_mp_clamp(r);
 587
 588   CLEANUP:
 589         return res;
 590 }
 591
 592 /*
 593  * The following subraction functions do in-line subractions based
 594  * on our curve size.
 595  *
 596  * ... 3 words
 597  */
 598 mp_err
 599 ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
 600                         const GFMethod *meth)
 601 {
 602         mp_err res = MP_OKAY;
 603         mp_digit b0 = 0, b1 = 0, b2 = 0;
 604         mp_digit r0 = 0, r1 = 0, r2 = 0;
 605         mp_digit borrow;
 606
 607         switch(MP_USED(a)) {
 608         case 3:
 609                 r2 = MP_DIGIT(a,2);
 610         case 2:
 611                 r1 = MP_DIGIT(a,1);
 612         case 1:
 613                 r0 = MP_DIGIT(a,0);
 614         }
 615         switch(MP_USED(b)) {
 616         case 3:
 617                 b2 = MP_DIGIT(b,2);
 618         case 2:
 619                 b1 = MP_DIGIT(b,1);
 620         case 1:
 621                 b0 = MP_DIGIT(b,0);
 622         }
 623
 624 #ifndef MPI_AMD64_ADD
 625         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 626         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 627         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 628 #else
 629         __asm__ (
 630                 "xorq   %3,%3           \n\t"
 631                 "subq   %4,%0           \n\t"
 632                 "sbbq   %5,%1           \n\t"
 633                 "sbbq   %6,%2           \n\t"
 634                 "adcq   $0,%3           \n\t"
 635                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
 636                 : "r" (b0), "r" (b1), "r" (b2),
 637                   "0" (r0), "1" (r1), "2" (r2)
 638                 : "%cc" );
 639 #endif
 640
 641         /* Do quick 'add' if we've gone under 0
 642          * (subtract the 2's complement of the curve field) */
 643         if (borrow) {
 644                 b2 = MP_DIGIT(&meth->irr,2);
 645                 b1 = MP_DIGIT(&meth->irr,1);
 646                 b0 = MP_DIGIT(&meth->irr,0);
 647 #ifndef MPI_AMD64_ADD
 648                 MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
 649                 MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
 650                 MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
 651 #else
 652                 __asm__ (
 653                         "addq   %3,%0           \n\t"
 654                         "adcq   %4,%1           \n\t"
 655                         "adcq   %5,%2           \n\t"
 656                         : "=r"(r0), "=r"(r1), "=r"(r2)
 657                         : "r" (b0), "r" (b1), "r" (b2),
 658                           "0" (r0), "1" (r1), "2" (r2)
 659                         : "%cc" );
 660 #endif
 661         }
 662
 663 #ifdef MPI_AMD64_ADD
 664         /* compiler fakeout? */
 665         if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
 666                 MP_CHECKOK(s_mp_pad(r, 4));
 667         }
 668 #endif
 669         MP_CHECKOK(s_mp_pad(r, 3));
 670         MP_DIGIT(r, 2) = r2;
 671         MP_DIGIT(r, 1) = r1;
 672         MP_DIGIT(r, 0) = r0;
 673         MP_SIGN(r) = MP_ZPOS;
 674         MP_USED(r) = 3;
 675         s_mp_clamp(r);
 676
 677   CLEANUP:
 678         return res;
 679 }
 680
 681 /* 4 words */
 682 mp_err
 683 ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
 684                         const GFMethod *meth)
 685 {
 686         mp_err res = MP_OKAY;
 687         mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
 688         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
 689         mp_digit borrow;
 690
 691         switch(MP_USED(a)) {
 692         case 4:
 693                 r3 = MP_DIGIT(a,3);
 694         case 3:
 695                 r2 = MP_DIGIT(a,2);
 696         case 2:
 697                 r1 = MP_DIGIT(a,1);
 698         case 1:
 699                 r0 = MP_DIGIT(a,0);
 700         }
 701         switch(MP_USED(b)) {
 702         case 4:
 703                 b3 = MP_DIGIT(b,3);
 704         case 3:
 705                 b2 = MP_DIGIT(b,2);
 706         case 2:
 707                 b1 = MP_DIGIT(b,1);
 708         case 1:
 709                 b0 = MP_DIGIT(b,0);
 710         }
 711
 712 #ifndef MPI_AMD64_ADD
 713         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 714         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 715         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 716         MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
 717 #else
 718         __asm__ (
 719                 "xorq   %4,%4           \n\t"
 720                 "subq   %5,%0           \n\t"
 721                 "sbbq   %6,%1           \n\t"
 722                 "sbbq   %7,%2           \n\t"
 723                 "sbbq   %8,%3           \n\t"
 724                 "adcq   $0,%4           \n\t"
 725                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
 726                 : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
 727                   "0" (r0), "1" (r1), "2" (r2), "3" (r3)
 728                 : "%cc" );
 729 #endif
 730
 731         /* Do quick 'add' if we've gone under 0
 732          * (subtract the 2's complement of the curve field) */
 733         if (borrow) {
 734                 b3 = MP_DIGIT(&meth->irr,3);
 735                 b2 = MP_DIGIT(&meth->irr,2);
 736                 b1 = MP_DIGIT(&meth->irr,1);
 737                 b0 = MP_DIGIT(&meth->irr,0);
 738 #ifndef MPI_AMD64_ADD
 739                 MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
 740                 MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
 741                 MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
 742                 MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
 743 #else
 744                 __asm__ (
 745                         "addq   %4,%0           \n\t"
 746                         "adcq   %5,%1           \n\t"
 747                         "adcq   %6,%2           \n\t"
 748                         "adcq   %7,%3           \n\t"
 749                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
 750                         : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
 751                           "0" (r0), "1" (r1), "2" (r2), "3" (r3)
 752                         : "%cc" );
 753 #endif
 754         }
 755 #ifdef MPI_AMD64_ADD
 756         /* compiler fakeout? */
 757         if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
 758                 MP_CHECKOK(s_mp_pad(r, 4));
 759         }
 760 #endif
 761         MP_CHECKOK(s_mp_pad(r, 4));
 762         MP_DIGIT(r, 3) = r3;
 763         MP_DIGIT(r, 2) = r2;
 764         MP_DIGIT(r, 1) = r1;
 765         MP_DIGIT(r, 0) = r0;
 766         MP_SIGN(r) = MP_ZPOS;
 767         MP_USED(r) = 4;
 768         s_mp_clamp(r);
 769
 770   CLEANUP:
 771         return res;
 772 }
 773
 774 /* 5 words */
 775 mp_err
 776 ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
 777                         const GFMethod *meth)
 778 {
 779         mp_err res = MP_OKAY;
 780         mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
 781         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
 782         mp_digit borrow;
 783
 784         switch(MP_USED(a)) {
 785         case 5:
 786                 r4 = MP_DIGIT(a,4);
 787         case 4:
 788                 r3 = MP_DIGIT(a,3);
 789         case 3:
 790                 r2 = MP_DIGIT(a,2);
 791         case 2:
 792                 r1 = MP_DIGIT(a,1);
 793         case 1:
 794                 r0 = MP_DIGIT(a,0);
 795         }
 796         switch(MP_USED(b)) {
 797         case 5:
 798                 b4 = MP_DIGIT(b,4);
 799         case 4:
 800                 b3 = MP_DIGIT(b,3);
 801         case 3:
 802                 b2 = MP_DIGIT(b,2);
 803         case 2:
 804                 b1 = MP_DIGIT(b,1);
 805         case 1:
 806                 b0 = MP_DIGIT(b,0);
 807         }
 808
 809         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 810         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 811         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 812         MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
 813         MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
 814
 815         /* Do quick 'add' if we've gone under 0
 816          * (subtract the 2's complement of the curve field) */
 817         if (borrow) {
 818                 b4 = MP_DIGIT(&meth->irr,4);
 819                 b3 = MP_DIGIT(&meth->irr,3);
 820                 b2 = MP_DIGIT(&meth->irr,2);
 821                 b1 = MP_DIGIT(&meth->irr,1);
 822                 b0 = MP_DIGIT(&meth->irr,0);
 823                 MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
 824                 MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
 825                 MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
 826                 MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
 827         }
 828         MP_CHECKOK(s_mp_pad(r, 5));
 829         MP_DIGIT(r, 4) = r4;
 830         MP_DIGIT(r, 3) = r3;
 831         MP_DIGIT(r, 2) = r2;
 832         MP_DIGIT(r, 1) = r1;
 833         MP_DIGIT(r, 0) = r0;
 834         MP_SIGN(r) = MP_ZPOS;
 835         MP_USED(r) = 5;
 836         s_mp_clamp(r);
 837
 838   CLEANUP:
 839         return res;
 840 }
 841
 842 /* 6 words */
 843 mp_err
 844 ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
 845                         const GFMethod *meth)
 846 {
 847         mp_err res = MP_OKAY;
 848         mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
 849         mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
 850         mp_digit borrow;
 851
 852         switch(MP_USED(a)) {
 853         case 6:
 854                 r5 = MP_DIGIT(a,5);
 855         case 5:
 856                 r4 = MP_DIGIT(a,4);
 857         case 4:
 858                 r3 = MP_DIGIT(a,3);
 859         case 3:
 860                 r2 = MP_DIGIT(a,2);
 861         case 2:
 862                 r1 = MP_DIGIT(a,1);
 863         case 1:
 864                 r0 = MP_DIGIT(a,0);
 865         }
 866         switch(MP_USED(b)) {
 867         case 6:
 868                 b5 = MP_DIGIT(b,5);
 869         case 5:
 870                 b4 = MP_DIGIT(b,4);
 871         case 4:
 872                 b3 = MP_DIGIT(b,3);
 873         case 3:
 874                 b2 = MP_DIGIT(b,2);
 875         case 2:
 876                 b1 = MP_DIGIT(b,1);
 877         case 1:
 878                 b0 = MP_DIGIT(b,0);
 879         }
 880
 881         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 882         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 883         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 884         MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
 885         MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
 886         MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
 887
 888         /* Do quick 'add' if we've gone under 0
 889          * (subtract the 2's complement of the curve field) */
 890         if (borrow) {
 891                 b5 = MP_DIGIT(&meth->irr,5);
 892                 b4 = MP_DIGIT(&meth->irr,4);
 893                 b3 = MP_DIGIT(&meth->irr,3);
 894                 b2 = MP_DIGIT(&meth->irr,2);
 895                 b1 = MP_DIGIT(&meth->irr,1);
 896                 b0 = MP_DIGIT(&meth->irr,0);
 897                 MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
 898                 MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
 899                 MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
 900                 MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
 901                 MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
 902         }
 903
 904         MP_CHECKOK(s_mp_pad(r, 6));
 905         MP_DIGIT(r, 5) = r5;
 906         MP_DIGIT(r, 4) = r4;
 907         MP_DIGIT(r, 3) = r3;
 908         MP_DIGIT(r, 2) = r2;
 909         MP_DIGIT(r, 1) = r1;
 910         MP_DIGIT(r, 0) = r0;
 911         MP_SIGN(r) = MP_ZPOS;
 912         MP_USED(r) = 6;
 913         s_mp_clamp(r);
 914
 915   CLEANUP:
 916         return res;
 917 }
 918
 919
 920 /* Reduces an integer to a field element. */
 921 mp_err
 922 ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
 923 {
 924         return mp_mod(a, &meth->irr, r);
 925 }
 926
 927 /* Multiplies two field elements. */
 928 mp_err
 929 ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
 930                    const GFMethod *meth)
 931 {
 932         return mp_mulmod(a, b, &meth->irr, r);
 933 }
 934
 935 /* Squares a field element. */
 936 mp_err
 937 ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
 938 {
 939         return mp_sqrmod(a, &meth->irr, r);
 940 }
 941
 942 /* Divides two field elements. If a is NULL, then returns the inverse of
 943  * b. */
 944 mp_err
 945 ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
 946                    const GFMethod *meth)
 947 {
 948         mp_err res = MP_OKAY;
 949         mp_int t;
 950
 951         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
 952         if (a == NULL) {
 953                 return mp_invmod(b, &meth->irr, r);
 954         } else {
 955                 /* MPI doesn't support divmod, so we implement it using invmod and
 956                  * mulmod. */
 957                 MP_CHECKOK(mp_init(&t));
 958                 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
 959                 MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
 960           CLEANUP:
 961                 mp_clear(&t);
 962                 return res;
 963         }
 964 }
 965
 966 /* Wrapper functions for generic binary polynomial field arithmetic. */
 967
 968 /* Adds two field elements. */
 969 mp_err
 970 ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
 971                         const GFMethod *meth)
 972 {
 973         return mp_badd(a, b, r);
 974 }
 975
 976 /* Negates a field element. Note that for binary polynomial fields, the
 977  * negation of a field element is the field element itself. */
 978 mp_err
 979 ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
 980 {
 981         if (a == r) {
 982                 return MP_OKAY;
 983         } else {
 984                 return mp_copy(a, r);
 985         }
 986 }
 987
 988 /* Reduces a binary polynomial to a field element. */
 989 mp_err
 990 ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
 991 {
 992         return mp_bmod(a, meth->irr_arr, r);
 993 }
 994
 995 /* Multiplies two field elements. */
 996 mp_err
 997 ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
 998                         const GFMethod *meth)
 999 {
1000         return mp_bmulmod(a, b, meth->irr_arr, r);
1001 }
1002
1003 /* Squares a field element. */
1004 mp_err
1005 ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1006 {
1007         return mp_bsqrmod(a, meth->irr_arr, r);
1008 }
1009
1010 /* Divides two field elements. If a is NULL, then returns the inverse of
1011  * b. */
1012 mp_err
1013 ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
1014                         const GFMethod *meth)
1015 {
1016         mp_err res = MP_OKAY;
1017         mp_int t;
1018
1019         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
1020         if (a == NULL) {
1021                 /* The GF(2^m) portion of MPI doesn't support invmod, so we
1022                  * compute 1/b. */
1023                 MP_CHECKOK(mp_init(&t));
1024                 MP_CHECKOK(mp_set_int(&t, 1));
1025                 MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
1026           CLEANUP:
1027                 mp_clear(&t);
1028                 return res;
1029         } else {
1030                 return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
1031         }
1032 }