security/nss/lib/freebl/ecl/ecp_192.c

   1 /*
   2  * ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is the elliptic curve math library for prime field curves.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Sun Microsystems, Inc.
  19  * Portions created by the Initial Developer are Copyright (C) 2003
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either the GNU General Public License Version 2 or later (the "GPL"), or
  27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38
  39 #include "ecp.h"
  40 #include "mpi.h"
  41 #include "mplogic.h"
  42 #include "mpi-priv.h"
  43 #include <stdlib.h>
  44
  45 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
  46
  47 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
  48  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
  49  * Implementation of the NIST Elliptic Curves over Prime Fields. */
  50 mp_err
  51 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
  52 {
  53         mp_err res = MP_OKAY;
  54         mp_size a_used = MP_USED(a);
  55         mp_digit r3;
  56 #ifndef MPI_AMD64_ADD
  57         mp_digit carry;
  58 #endif
  59 #ifdef ECL_THIRTY_TWO_BIT
  60         mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
  61         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
  62 #else
  63         mp_digit a5 = 0, a4 = 0, a3 = 0;
  64         mp_digit r0, r1, r2;
  65 #endif
  66
  67         /* reduction not needed if a is not larger than field size */
  68         if (a_used < ECP192_DIGITS) {
  69                 if (a == r) {
  70                         return MP_OKAY;
  71                 }
  72                 return mp_copy(a, r);
  73         }
  74
  75         /* for polynomials larger than twice the field size, use regular
  76          * reduction */
  77         if (a_used > ECP192_DIGITS*2) {
  78                 MP_CHECKOK(mp_mod(a, &meth->irr, r));
  79         } else {
  80                 /* copy out upper words of a */
  81
  82 #ifdef ECL_THIRTY_TWO_BIT
  83
  84                 /* in all the math below,
  85                  * nXb is most signifiant, nXa is least significant */
  86                 switch (a_used) {
  87                 case 12:
  88                         a5b = MP_DIGIT(a, 11);
  89                 case 11:
  90                         a5a = MP_DIGIT(a, 10);
  91                 case 10:
  92                         a4b = MP_DIGIT(a, 9);
  93                 case 9:
  94                         a4a = MP_DIGIT(a, 8);
  95                 case 8:
  96                         a3b = MP_DIGIT(a, 7);
  97                 case 7:
  98                         a3a = MP_DIGIT(a, 6);
  99                 }
 100
 101
 102                 r2b= MP_DIGIT(a, 5);
 103                 r2a= MP_DIGIT(a, 4);
 104                 r1b = MP_DIGIT(a, 3);
 105                 r1a = MP_DIGIT(a, 2);
 106                 r0b = MP_DIGIT(a, 1);
 107                 r0a = MP_DIGIT(a, 0);
 108
 109                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 110                 MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
 111                 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
 112                 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
 113                 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
 114                 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
 115                 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
 116                 r3 = carry; carry = 0;
 117                 MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
 118                 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
 119                 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
 120                 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
 121                 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
 122                 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
 123                 r3 += carry;
 124                 MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
 125                 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
 126                 MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
 127                 MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
 128                 r3 += carry;
 129
 130                 /* reduce out the carry */
 131                 while (r3) {
 132                         MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
 133                         MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
 134                         MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
 135                         MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
 136                         MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
 137                         MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
 138                         r3 = carry;
 139                 }
 140
 141                 /* check for final reduction */
 142                 /*
 143                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 144                  * 0xffffffffffffffff. That means we can only be over and need
 145                  * one more reduction
 146                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 147                  *     and
 148                  *     r1 == 0xffffffffffffffffff   or
 149                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 150                  * In all cases, we subtract the field (or add the 2's
 151                  * complement value (1,1,0)).  (r0, r1, r2)
 152                  */
 153                 if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
 154                         && (r1b == 0xffffffff) ) &&
 155                            ((r1a == 0xffffffff) ||
 156                             (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
 157                                         (r0b == 0xffffffff)) ) {
 158                         /* do a quick subtract */
 159                         MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
 160                         r0b += carry;
 161                         r1a = r1b = r2a = r2b = 0;
 162                 }
 163
 164                 /* set the lower words of r */
 165                 if (a != r) {
 166                         MP_CHECKOK(s_mp_pad(r, 6));
 167                 }
 168                 MP_DIGIT(r, 5) = r2b;
 169                 MP_DIGIT(r, 4) = r2a;
 170                 MP_DIGIT(r, 3) = r1b;
 171                 MP_DIGIT(r, 2) = r1a;
 172                 MP_DIGIT(r, 1) = r0b;
 173                 MP_DIGIT(r, 0) = r0a;
 174                 MP_USED(r) = 6;
 175 #else
 176                 switch (a_used) {
 177                 case 6:
 178                         a5 = MP_DIGIT(a, 5);
 179                 case 5:
 180                         a4 = MP_DIGIT(a, 4);
 181                 case 4:
 182                         a3 = MP_DIGIT(a, 3);
 183                 }
 184
 185                 r2 = MP_DIGIT(a, 2);
 186                 r1 = MP_DIGIT(a, 1);
 187                 r0 = MP_DIGIT(a, 0);
 188
 189                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 190 #ifndef MPI_AMD64_ADD
 191                 MP_ADD_CARRY(r0, a3, r0, 0,     carry);
 192                 MP_ADD_CARRY(r1, a3, r1, carry, carry);
 193                 MP_ADD_CARRY(r2, a4, r2, carry, carry);
 194                 r3 = carry;
 195                 MP_ADD_CARRY(r0, a5, r0, 0,     carry);
 196                 MP_ADD_CARRY(r1, a5, r1, carry, carry);
 197                 MP_ADD_CARRY(r2, a5, r2, carry, carry);
 198                 r3 += carry;
 199                 MP_ADD_CARRY(r1, a4, r1, 0,     carry);
 200                 MP_ADD_CARRY(r2,  0, r2, carry, carry);
 201                 r3 += carry;
 202
 203 #else
 204                 r2 = MP_DIGIT(a, 2);
 205                 r1 = MP_DIGIT(a, 1);
 206                 r0 = MP_DIGIT(a, 0);
 207
 208                 /* set the lower words of r */
 209                 __asm__ (
 210                 "xorq   %3,%3           \n\t"
 211                 "addq   %4,%0           \n\t"
 212                 "adcq   %4,%1           \n\t"
 213                 "adcq   %5,%2           \n\t"
 214                 "adcq   $0,%3           \n\t"
 215                 "addq   %6,%0           \n\t"
 216                 "adcq   %6,%1           \n\t"
 217                 "adcq   %6,%2           \n\t"
 218                 "adcq   $0,%3           \n\t"
 219                 "addq   %5,%1           \n\t"
 220                 "adcq   $0,%2           \n\t"
 221                 "adcq   $0,%3           \n\t"
 222                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
 223                   "=r"(a4), "=r"(a5)
 224                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
 225                   "4" (a3), "5" (a4), "6"(a5)
 226                 : "%cc" );
 227 #endif
 228
 229                 /* reduce out the carry */
 230                 while (r3) {
 231 #ifndef MPI_AMD64_ADD
 232                         MP_ADD_CARRY(r0, r3, r0, 0,     carry);
 233                         MP_ADD_CARRY(r1, r3, r1, carry, carry);
 234                         MP_ADD_CARRY(r2,  0, r2, carry, carry);
 235                         r3 = carry;
 236 #else
 237                         a3=r3;
 238                         __asm__ (
 239                         "xorq   %3,%3           \n\t"
 240                         "addq   %4,%0           \n\t"
 241                         "adcq   %4,%1           \n\t"
 242                         "adcq   $0,%2           \n\t"
 243                         "adcq   $0,%3           \n\t"
 244                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
 245                         : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
 246                         : "%cc" );
 247 #endif
 248                 }
 249
 250                 /* check for final reduction */
 251                 /*
 252                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 253                  * 0xffffffffffffffff. That means we can only be over and need
 254                  * one more reduction
 255                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 256                  *     and
 257                  *     r1 == 0xffffffffffffffffff   or
 258                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 259                  * In all cases, we subtract the field (or add the 2's
 260                  * complement value (1,1,0)).  (r0, r1, r2)
 261                  */
 262                 if (r3 || ((r2 == MP_DIGIT_MAX) &&
 263                       ((r1 == MP_DIGIT_MAX) ||
 264                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 265                         /* do a quick subtract */
 266                         r0++;
 267                         r1 = r2 = 0;
 268                 }
 269                 /* set the lower words of r */
 270                 if (a != r) {
 271                         MP_CHECKOK(s_mp_pad(r, 3));
 272                 }
 273                 MP_DIGIT(r, 2) = r2;
 274                 MP_DIGIT(r, 1) = r1;
 275                 MP_DIGIT(r, 0) = r0;
 276                 MP_USED(r) = 3;
 277 #endif
 278         }
 279
 280   CLEANUP:
 281         return res;
 282 }
 283
 284 #ifndef ECL_THIRTY_TWO_BIT
 285 /* Compute the sum of 192 bit curves. Do the work in-line since the
 286  * number of words are so small, we don't want to overhead of mp function
 287  * calls.  Uses optimized modular reduction for p192.
 288  */
 289 mp_err
 290 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
 291                         const GFMethod *meth)
 292 {
 293         mp_err res = MP_OKAY;
 294         mp_digit a0 = 0, a1 = 0, a2 = 0;
 295         mp_digit r0 = 0, r1 = 0, r2 = 0;
 296         mp_digit carry;
 297
 298         switch(MP_USED(a)) {
 299         case 3:
 300                 a2 = MP_DIGIT(a,2);
 301         case 2:
 302                 a1 = MP_DIGIT(a,1);
 303         case 1:
 304                 a0 = MP_DIGIT(a,0);
 305         }
 306         switch(MP_USED(b)) {
 307         case 3:
 308                 r2 = MP_DIGIT(b,2);
 309         case 2:
 310                 r1 = MP_DIGIT(b,1);
 311         case 1:
 312                 r0 = MP_DIGIT(b,0);
 313         }
 314
 315 #ifndef MPI_AMD64_ADD
 316         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 317         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 318         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 319 #else
 320         __asm__ (
 321                 "xorq   %3,%3           \n\t"
 322                 "addq   %4,%0           \n\t"
 323                 "adcq   %5,%1           \n\t"
 324                 "adcq   %6,%2           \n\t"
 325                 "adcq   $0,%3           \n\t"
 326                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
 327                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
 328                   "1" (r1), "2" (r2)
 329                 : "%cc" );
 330 #endif
 331
 332         /* Do quick 'subract' if we've gone over
 333          * (add the 2's complement of the curve field) */
 334         if (carry || ((r2 == MP_DIGIT_MAX) &&
 335                       ((r1 == MP_DIGIT_MAX) ||
 336                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 337 #ifndef MPI_AMD64_ADD
 338                 MP_ADD_CARRY(r0, 1, r0, 0,     carry);
 339                 MP_ADD_CARRY(r1, 1, r1, carry, carry);
 340                 MP_ADD_CARRY(r2, 0, r2, carry, carry);
 341 #else
 342                 __asm__ (
 343                         "addq   $1,%0           \n\t"
 344                         "adcq   $1,%1           \n\t"
 345                         "adcq   $0,%2           \n\t"
 346                         : "=r"(r0), "=r"(r1), "=r"(r2)
 347                         : "0" (r0), "1" (r1), "2" (r2)
 348                         : "%cc" );
 349 #endif
 350         }
 351
 352
 353         MP_CHECKOK(s_mp_pad(r, 3));
 354         MP_DIGIT(r, 2) = r2;
 355         MP_DIGIT(r, 1) = r1;
 356         MP_DIGIT(r, 0) = r0;
 357         MP_SIGN(r) = MP_ZPOS;
 358         MP_USED(r) = 3;
 359         s_mp_clamp(r);
 360
 361
 362   CLEANUP:
 363         return res;
 364 }
 365
 366 /* Compute the diff of 192 bit curves. Do the work in-line since the
 367  * number of words are so small, we don't want to overhead of mp function
 368  * calls.  Uses optimized modular reduction for p192.
 369  */
 370 mp_err
 371 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
 372                         const GFMethod *meth)
 373 {
 374         mp_err res = MP_OKAY;
 375         mp_digit b0 = 0, b1 = 0, b2 = 0;
 376         mp_digit r0 = 0, r1 = 0, r2 = 0;
 377         mp_digit borrow;
 378
 379         switch(MP_USED(a)) {
 380         case 3:
 381                 r2 = MP_DIGIT(a,2);
 382         case 2:
 383                 r1 = MP_DIGIT(a,1);
 384         case 1:
 385                 r0 = MP_DIGIT(a,0);
 386         }
 387
 388         switch(MP_USED(b)) {
 389         case 3:
 390                 b2 = MP_DIGIT(b,2);
 391         case 2:
 392                 b1 = MP_DIGIT(b,1);
 393         case 1:
 394                 b0 = MP_DIGIT(b,0);
 395         }
 396
 397 #ifndef MPI_AMD64_ADD
 398         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 399         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 400         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 401 #else
 402         __asm__ (
 403                 "xorq   %3,%3           \n\t"
 404                 "subq   %4,%0           \n\t"
 405                 "sbbq   %5,%1           \n\t"
 406                 "sbbq   %6,%2           \n\t"
 407                 "adcq   $0,%3           \n\t"
 408                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
 409                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
 410                   "1" (r1), "2" (r2)
 411                 : "%cc" );
 412 #endif
 413
 414         /* Do quick 'add' if we've gone under 0
 415          * (subtract the 2's complement of the curve field) */
 416         if (borrow) {
 417 #ifndef MPI_AMD64_ADD
 418                 MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
 419                 MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
 420                 MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
 421 #else
 422                 __asm__ (
 423                         "subq   $1,%0           \n\t"
 424                         "sbbq   $1,%1           \n\t"
 425                         "sbbq   $0,%2           \n\t"
 426                         : "=r"(r0), "=r"(r1), "=r"(r2)
 427                         : "0" (r0), "1" (r1), "2" (r2)
 428                         : "%cc" );
 429 #endif
 430         }
 431
 432         MP_CHECKOK(s_mp_pad(r, 3));
 433         MP_DIGIT(r, 2) = r2;
 434         MP_DIGIT(r, 1) = r1;
 435         MP_DIGIT(r, 0) = r0;
 436         MP_SIGN(r) = MP_ZPOS;
 437         MP_USED(r) = 3;
 438         s_mp_clamp(r);
 439
 440   CLEANUP:
 441         return res;
 442 }
 443
 444 #endif
 445
 446 /* Compute the square of polynomial a, reduce modulo p192. Store the
 447  * result in r.  r could be a.  Uses optimized modular reduction for p192.
 448  */
 449 mp_err
 450 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
 451 {
 452         mp_err res = MP_OKAY;
 453
 454         MP_CHECKOK(mp_sqr(a, r));
 455         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 456   CLEANUP:
 457         return res;
 458 }
 459
 460 /* Compute the product of two polynomials a and b, reduce modulo p192.
 461  * Store the result in r.  r could be a or b; a could be b.  Uses
 462  * optimized modular reduction for p192. */
 463 mp_err
 464 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
 465                                         const GFMethod *meth)
 466 {
 467         mp_err res = MP_OKAY;
 468
 469         MP_CHECKOK(mp_mul(a, b, r));
 470         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 471   CLEANUP:
 472         return res;
 473 }
 474
 475 /* Divides two field elements. If a is NULL, then returns the inverse of
 476  * b. */
 477 mp_err
 478 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
 479                    const GFMethod *meth)
 480 {
 481         mp_err res = MP_OKAY;
 482         mp_int t;
 483
 484         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
 485         if (a == NULL) {
 486                 return  mp_invmod(b, &meth->irr, r);
 487         } else {
 488                 /* MPI doesn't support divmod, so we implement it using invmod and
 489                  * mulmod. */
 490                 MP_CHECKOK(mp_init(&t));
 491                 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
 492                 MP_CHECKOK(mp_mul(a, &t, r));
 493                 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 494           CLEANUP:
 495                 mp_clear(&t);
 496                 return res;
 497         }
 498 }
 499
 500 /* Wire in fast field arithmetic and precomputation of base point for
 501  * named curves. */
 502 mp_err
 503 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
 504 {
 505         if (name == ECCurve_NIST_P192) {
 506                 group->meth->field_mod = &ec_GFp_nistp192_mod;
 507                 group->meth->field_mul = &ec_GFp_nistp192_mul;
 508                 group->meth->field_sqr = &ec_GFp_nistp192_sqr;
 509                 group->meth->field_div = &ec_GFp_nistp192_div;
 510 #ifndef ECL_THIRTY_TWO_BIT
 511                 group->meth->field_add = &ec_GFp_nistp192_add;
 512                 group->meth->field_sub = &ec_GFp_nistp192_sub;
 513 #endif
 514         }
 515         return MP_OKAY;
 516 }