usr/src/common/crypto/ecc/ecp_192.c

   1 /*
   2  * ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is the elliptic curve math library for prime field curves.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Sun Microsystems, Inc.
  19  * Portions created by the Initial Developer are Copyright (C) 2003
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either the GNU General Public License Version 2 or later (the "GPL"), or
  27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38 /*
  39  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  40  * Use is subject to license terms.
  41  *
  42  * Sun elects to use this software under the MPL license.
  43  */
  44
  45 #pragma ident   "%Z%%M% %I%     %E% SMI"
  46
  47 #include "ecp.h"
  48 #include "mpi.h"
  49 #include "mplogic.h"
  50 #include "mpi-priv.h"
  51 #ifndef _KERNEL
  52 #include <stdlib.h>
  53 #endif
  54
  55 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
  56
  57 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
  58  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
  59  * Implementation of the NIST Elliptic Curves over Prime Fields. */
  60 mp_err
  61 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
  62 {
  63         mp_err res = MP_OKAY;
  64         mp_size a_used = MP_USED(a);
  65         mp_digit r3;
  66 #ifndef MPI_AMD64_ADD
  67         mp_digit carry;
  68 #endif
  69 #ifdef ECL_THIRTY_TWO_BIT
  70         mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
  71         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
  72 #else
  73         mp_digit a5 = 0, a4 = 0, a3 = 0;
  74         mp_digit r0, r1, r2;
  75 #endif
  76
  77         /* reduction not needed if a is not larger than field size */
  78         if (a_used < ECP192_DIGITS) {
  79                 if (a == r) {
  80                         return MP_OKAY;
  81                 }
  82                 return mp_copy(a, r);
  83         }
  84
  85         /* for polynomials larger than twice the field size, use regular
  86          * reduction */
  87         if (a_used > ECP192_DIGITS*2) {
  88                 MP_CHECKOK(mp_mod(a, &meth->irr, r));
  89         } else {
  90                 /* copy out upper words of a */
  91
  92 #ifdef ECL_THIRTY_TWO_BIT
  93
  94                 /* in all the math below,
  95                  * nXb is most signifiant, nXa is least significant */
  96                 switch (a_used) {
  97                 case 12:
  98                         a5b = MP_DIGIT(a, 11);
  99                 case 11:
 100                         a5a = MP_DIGIT(a, 10);
 101                 case 10:
 102                         a4b = MP_DIGIT(a, 9);
 103                 case 9:
 104                         a4a = MP_DIGIT(a, 8);
 105                 case 8:
 106                         a3b = MP_DIGIT(a, 7);
 107                 case 7:
 108                         a3a = MP_DIGIT(a, 6);
 109                 }
 110
 111
 112                 r2b= MP_DIGIT(a, 5);
 113                 r2a= MP_DIGIT(a, 4);
 114                 r1b = MP_DIGIT(a, 3);
 115                 r1a = MP_DIGIT(a, 2);
 116                 r0b = MP_DIGIT(a, 1);
 117                 r0a = MP_DIGIT(a, 0);
 118
 119                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 120                 MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
 121                 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
 122                 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
 123                 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
 124                 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
 125                 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
 126                 r3 = carry; carry = 0;
 127                 MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
 128                 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
 129                 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
 130                 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
 131                 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
 132                 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
 133                 r3 += carry;
 134                 MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
 135                 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
 136                 MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
 137                 MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
 138                 r3 += carry;
 139
 140                 /* reduce out the carry */
 141                 while (r3) {
 142                         MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
 143                         MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
 144                         MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
 145                         MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
 146                         MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
 147                         MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
 148                         r3 = carry;
 149                 }
 150
 151                 /* check for final reduction */
 152                 /*
 153                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 154                  * 0xffffffffffffffff. That means we can only be over and need
 155                  * one more reduction
 156                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 157                  *     and
 158                  *     r1 == 0xffffffffffffffffff   or
 159                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 160                  * In all cases, we subtract the field (or add the 2's
 161                  * complement value (1,1,0)).  (r0, r1, r2)
 162                  */
 163                 if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
 164                         && (r1b == 0xffffffff) ) &&
 165                            ((r1a == 0xffffffff) ||
 166                             (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
 167                                         (r0b == 0xffffffff)) ) {
 168                         /* do a quick subtract */
 169                         MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
 170                         r0b += carry;
 171                         r1a = r1b = r2a = r2b = 0;
 172                 }
 173
 174                 /* set the lower words of r */
 175                 if (a != r) {
 176                         MP_CHECKOK(s_mp_pad(r, 6));
 177                 }
 178                 MP_DIGIT(r, 5) = r2b;
 179                 MP_DIGIT(r, 4) = r2a;
 180                 MP_DIGIT(r, 3) = r1b;
 181                 MP_DIGIT(r, 2) = r1a;
 182                 MP_DIGIT(r, 1) = r0b;
 183                 MP_DIGIT(r, 0) = r0a;
 184                 MP_USED(r) = 6;
 185 #else
 186                 switch (a_used) {
 187                 case 6:
 188                         a5 = MP_DIGIT(a, 5);
 189                 case 5:
 190                         a4 = MP_DIGIT(a, 4);
 191                 case 4:
 192                         a3 = MP_DIGIT(a, 3);
 193                 }
 194
 195                 r2 = MP_DIGIT(a, 2);
 196                 r1 = MP_DIGIT(a, 1);
 197                 r0 = MP_DIGIT(a, 0);
 198
 199                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 200 #ifndef MPI_AMD64_ADD
 201                 MP_ADD_CARRY(r0, a3, r0, 0,     carry);
 202                 MP_ADD_CARRY(r1, a3, r1, carry, carry);
 203                 MP_ADD_CARRY(r2, a4, r2, carry, carry);
 204                 r3 = carry;
 205                 MP_ADD_CARRY(r0, a5, r0, 0,     carry);
 206                 MP_ADD_CARRY(r1, a5, r1, carry, carry);
 207                 MP_ADD_CARRY(r2, a5, r2, carry, carry);
 208                 r3 += carry;
 209                 MP_ADD_CARRY(r1, a4, r1, 0,     carry);
 210                 MP_ADD_CARRY(r2,  0, r2, carry, carry);
 211                 r3 += carry;
 212
 213 #else
 214                 r2 = MP_DIGIT(a, 2);
 215                 r1 = MP_DIGIT(a, 1);
 216                 r0 = MP_DIGIT(a, 0);
 217
 218                 /* set the lower words of r */
 219                 __asm__ (
 220                 "xorq   %3,%3           \n\t"
 221                 "addq   %4,%0           \n\t"
 222                 "adcq   %4,%1           \n\t"
 223                 "adcq   %5,%2           \n\t"
 224                 "adcq   $0,%3           \n\t"
 225                 "addq   %6,%0           \n\t"
 226                 "adcq   %6,%1           \n\t"
 227                 "adcq   %6,%2           \n\t"
 228                 "adcq   $0,%3           \n\t"
 229                 "addq   %5,%1           \n\t"
 230                 "adcq   $0,%2           \n\t"
 231                 "adcq   $0,%3           \n\t"
 232                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
 233                   "=r"(a4), "=r"(a5)
 234                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
 235                   "4" (a3), "5" (a4), "6"(a5)
 236                 : "%cc" );
 237 #endif
 238
 239                 /* reduce out the carry */
 240                 while (r3) {
 241 #ifndef MPI_AMD64_ADD
 242                         MP_ADD_CARRY(r0, r3, r0, 0,     carry);
 243                         MP_ADD_CARRY(r1, r3, r1, carry, carry);
 244                         MP_ADD_CARRY(r2,  0, r2, carry, carry);
 245                         r3 = carry;
 246 #else
 247                         a3=r3;
 248                         __asm__ (
 249                         "xorq   %3,%3           \n\t"
 250                         "addq   %4,%0           \n\t"
 251                         "adcq   %4,%1           \n\t"
 252                         "adcq   $0,%2           \n\t"
 253                         "adcq   $0,%3           \n\t"
 254                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
 255                         : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
 256                         : "%cc" );
 257 #endif
 258                 }
 259
 260                 /* check for final reduction */
 261                 /*
 262                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 263                  * 0xffffffffffffffff. That means we can only be over and need
 264                  * one more reduction
 265                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 266                  *     and
 267                  *     r1 == 0xffffffffffffffffff   or
 268                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 269                  * In all cases, we subtract the field (or add the 2's
 270                  * complement value (1,1,0)).  (r0, r1, r2)
 271                  */
 272                 if (r3 || ((r2 == MP_DIGIT_MAX) &&
 273                       ((r1 == MP_DIGIT_MAX) ||
 274                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 275                         /* do a quick subtract */
 276                         r0++;
 277                         r1 = r2 = 0;
 278                 }
 279                 /* set the lower words of r */
 280                 if (a != r) {
 281                         MP_CHECKOK(s_mp_pad(r, 3));
 282                 }
 283                 MP_DIGIT(r, 2) = r2;
 284                 MP_DIGIT(r, 1) = r1;
 285                 MP_DIGIT(r, 0) = r0;
 286                 MP_USED(r) = 3;
 287 #endif
 288         }
 289
 290   CLEANUP:
 291         return res;
 292 }
 293
 294 #ifndef ECL_THIRTY_TWO_BIT
 295 /* Compute the sum of 192 bit curves. Do the work in-line since the
 296  * number of words are so small, we don't want to overhead of mp function
 297  * calls.  Uses optimized modular reduction for p192.
 298  */
 299 mp_err
 300 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
 301                         const GFMethod *meth)
 302 {
 303         mp_err res = MP_OKAY;
 304         mp_digit a0 = 0, a1 = 0, a2 = 0;
 305         mp_digit r0 = 0, r1 = 0, r2 = 0;
 306         mp_digit carry;
 307
 308         switch(MP_USED(a)) {
 309         case 3:
 310                 a2 = MP_DIGIT(a,2);
 311         case 2:
 312                 a1 = MP_DIGIT(a,1);
 313         case 1:
 314                 a0 = MP_DIGIT(a,0);
 315         }
 316         switch(MP_USED(b)) {
 317         case 3:
 318                 r2 = MP_DIGIT(b,2);
 319         case 2:
 320                 r1 = MP_DIGIT(b,1);
 321         case 1:
 322                 r0 = MP_DIGIT(b,0);
 323         }
 324
 325 #ifndef MPI_AMD64_ADD
 326         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 327         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 328         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 329 #else
 330         __asm__ (
 331                 "xorq   %3,%3           \n\t"
 332                 "addq   %4,%0           \n\t"
 333                 "adcq   %5,%1           \n\t"
 334                 "adcq   %6,%2           \n\t"
 335                 "adcq   $0,%3           \n\t"
 336                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
 337                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
 338                   "1" (r1), "2" (r2)
 339                 : "%cc" );
 340 #endif
 341
 342         /* Do quick 'subract' if we've gone over
 343          * (add the 2's complement of the curve field) */
 344         if (carry || ((r2 == MP_DIGIT_MAX) &&
 345                       ((r1 == MP_DIGIT_MAX) ||
 346                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 347 #ifndef MPI_AMD64_ADD
 348                 MP_ADD_CARRY(r0, 1, r0, 0,     carry);
 349                 MP_ADD_CARRY(r1, 1, r1, carry, carry);
 350                 MP_ADD_CARRY(r2, 0, r2, carry, carry);
 351 #else
 352                 __asm__ (
 353                         "addq   $1,%0           \n\t"
 354                         "adcq   $1,%1           \n\t"
 355                         "adcq   $0,%2           \n\t"
 356                         : "=r"(r0), "=r"(r1), "=r"(r2)
 357                         : "0" (r0), "1" (r1), "2" (r2)
 358                         : "%cc" );
 359 #endif
 360         }
 361
 362
 363         MP_CHECKOK(s_mp_pad(r, 3));
 364         MP_DIGIT(r, 2) = r2;
 365         MP_DIGIT(r, 1) = r1;
 366         MP_DIGIT(r, 0) = r0;
 367         MP_SIGN(r) = MP_ZPOS;
 368         MP_USED(r) = 3;
 369         s_mp_clamp(r);
 370
 371
 372   CLEANUP:
 373         return res;
 374 }
 375
 376 /* Compute the diff of 192 bit curves. Do the work in-line since the
 377  * number of words are so small, we don't want to overhead of mp function
 378  * calls.  Uses optimized modular reduction for p192.
 379  */
 380 mp_err
 381 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
 382                         const GFMethod *meth)
 383 {
 384         mp_err res = MP_OKAY;
 385         mp_digit b0 = 0, b1 = 0, b2 = 0;
 386         mp_digit r0 = 0, r1 = 0, r2 = 0;
 387         mp_digit borrow;
 388
 389         switch(MP_USED(a)) {
 390         case 3:
 391                 r2 = MP_DIGIT(a,2);
 392         case 2:
 393                 r1 = MP_DIGIT(a,1);
 394         case 1:
 395                 r0 = MP_DIGIT(a,0);
 396         }
 397
 398         switch(MP_USED(b)) {
 399         case 3:
 400                 b2 = MP_DIGIT(b,2);
 401         case 2:
 402                 b1 = MP_DIGIT(b,1);
 403         case 1:
 404                 b0 = MP_DIGIT(b,0);
 405         }
 406
 407 #ifndef MPI_AMD64_ADD
 408         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 409         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 410         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 411 #else
 412         __asm__ (
 413                 "xorq   %3,%3           \n\t"
 414                 "subq   %4,%0           \n\t"
 415                 "sbbq   %5,%1           \n\t"
 416                 "sbbq   %6,%2           \n\t"
 417                 "adcq   $0,%3           \n\t"
 418                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
 419                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
 420                   "1" (r1), "2" (r2)
 421                 : "%cc" );
 422 #endif
 423
 424         /* Do quick 'add' if we've gone under 0
 425          * (subtract the 2's complement of the curve field) */
 426         if (borrow) {
 427 #ifndef MPI_AMD64_ADD
 428                 MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
 429                 MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
 430                 MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
 431 #else
 432                 __asm__ (
 433                         "subq   $1,%0           \n\t"
 434                         "sbbq   $1,%1           \n\t"
 435                         "sbbq   $0,%2           \n\t"
 436                         : "=r"(r0), "=r"(r1), "=r"(r2)
 437                         : "0" (r0), "1" (r1), "2" (r2)
 438                         : "%cc" );
 439 #endif
 440         }
 441
 442         MP_CHECKOK(s_mp_pad(r, 3));
 443         MP_DIGIT(r, 2) = r2;
 444         MP_DIGIT(r, 1) = r1;
 445         MP_DIGIT(r, 0) = r0;
 446         MP_SIGN(r) = MP_ZPOS;
 447         MP_USED(r) = 3;
 448         s_mp_clamp(r);
 449
 450   CLEANUP:
 451         return res;
 452 }
 453
 454 #endif
 455
 456 /* Compute the square of polynomial a, reduce modulo p192. Store the
 457  * result in r.  r could be a.  Uses optimized modular reduction for p192.
 458  */
 459 mp_err
 460 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
 461 {
 462         mp_err res = MP_OKAY;
 463
 464         MP_CHECKOK(mp_sqr(a, r));
 465         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 466   CLEANUP:
 467         return res;
 468 }
 469
 470 /* Compute the product of two polynomials a and b, reduce modulo p192.
 471  * Store the result in r.  r could be a or b; a could be b.  Uses
 472  * optimized modular reduction for p192. */
 473 mp_err
 474 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
 475                                         const GFMethod *meth)
 476 {
 477         mp_err res = MP_OKAY;
 478
 479         MP_CHECKOK(mp_mul(a, b, r));
 480         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 481   CLEANUP:
 482         return res;
 483 }
 484
 485 /* Divides two field elements. If a is NULL, then returns the inverse of
 486  * b. */
 487 mp_err
 488 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
 489                    const GFMethod *meth)
 490 {
 491         mp_err res = MP_OKAY;
 492         mp_int t;
 493
 494         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
 495         if (a == NULL) {
 496                 return  mp_invmod(b, &meth->irr, r);
 497         } else {
 498                 /* MPI doesn't support divmod, so we implement it using invmod and
 499                  * mulmod. */
 500                 MP_CHECKOK(mp_init(&t, FLAG(b)));
 501                 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
 502                 MP_CHECKOK(mp_mul(a, &t, r));
 503                 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 504           CLEANUP:
 505                 mp_clear(&t);
 506                 return res;
 507         }
 508 }
 509
 510 /* Wire in fast field arithmetic and precomputation of base point for
 511  * named curves. */
 512 mp_err
 513 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
 514 {
 515         if (name == ECCurve_NIST_P192) {
 516                 group->meth->field_mod = &ec_GFp_nistp192_mod;
 517                 group->meth->field_mul = &ec_GFp_nistp192_mul;
 518                 group->meth->field_sqr = &ec_GFp_nistp192_sqr;
 519                 group->meth->field_div = &ec_GFp_nistp192_div;
 520 #ifndef ECL_THIRTY_TWO_BIT
 521                 group->meth->field_add = &ec_GFp_nistp192_add;
 522                 group->meth->field_sub = &ec_GFp_nistp192_sub;
 523 #endif
 524         }
 525         return MP_OKAY;
 526 }