Import from 1.9a8 tarball
[mozilla-nss.git] / security / nss / lib / freebl / ecl / ecp_192.c
blobf4cd42bc3bf72906fff106b6cc08183dd28ce12c
1 /*
2 * ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is the elliptic curve math library for prime field curves.
17 * The Initial Developer of the Original Code is
18 * Sun Microsystems, Inc.
19 * Portions created by the Initial Developer are Copyright (C) 2003
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "ecp.h"
40 #include "mpi.h"
41 #include "mplogic.h"
42 #include "mpi-priv.h"
43 #include <stdlib.h>
45 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
47 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses
48 * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
49 * Implementation of the NIST Elliptic Curves over Prime Fields. */
50 mp_err
51 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
53 mp_err res = MP_OKAY;
54 mp_size a_used = MP_USED(a);
55 mp_digit r3;
56 #ifndef MPI_AMD64_ADD
57 mp_digit carry;
58 #endif
59 #ifdef ECL_THIRTY_TWO_BIT
60 mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
61 mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
62 #else
63 mp_digit a5 = 0, a4 = 0, a3 = 0;
64 mp_digit r0, r1, r2;
65 #endif
67 /* reduction not needed if a is not larger than field size */
68 if (a_used < ECP192_DIGITS) {
69 if (a == r) {
70 return MP_OKAY;
72 return mp_copy(a, r);
75 /* for polynomials larger than twice the field size, use regular
76 * reduction */
77 if (a_used > ECP192_DIGITS*2) {
78 MP_CHECKOK(mp_mod(a, &meth->irr, r));
79 } else {
80 /* copy out upper words of a */
82 #ifdef ECL_THIRTY_TWO_BIT
84 /* in all the math below,
85 * nXb is most signifiant, nXa is least significant */
86 switch (a_used) {
87 case 12:
88 a5b = MP_DIGIT(a, 11);
89 case 11:
90 a5a = MP_DIGIT(a, 10);
91 case 10:
92 a4b = MP_DIGIT(a, 9);
93 case 9:
94 a4a = MP_DIGIT(a, 8);
95 case 8:
96 a3b = MP_DIGIT(a, 7);
97 case 7:
98 a3a = MP_DIGIT(a, 6);
102 r2b= MP_DIGIT(a, 5);
103 r2a= MP_DIGIT(a, 4);
104 r1b = MP_DIGIT(a, 3);
105 r1a = MP_DIGIT(a, 2);
106 r0b = MP_DIGIT(a, 1);
107 r0a = MP_DIGIT(a, 0);
109 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
110 MP_ADD_CARRY(r0a, a3a, r0a, 0, carry);
111 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
112 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
113 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
114 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
115 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
116 r3 = carry; carry = 0;
117 MP_ADD_CARRY(r0a, a5a, r0a, 0, carry);
118 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
119 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
120 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
121 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
122 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
123 r3 += carry;
124 MP_ADD_CARRY(r1a, a4a, r1a, 0, carry);
125 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
126 MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
127 MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
128 r3 += carry;
130 /* reduce out the carry */
131 while (r3) {
132 MP_ADD_CARRY(r0a, r3, r0a, 0, carry);
133 MP_ADD_CARRY(r0b, 0, r0b, carry, carry);
134 MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
135 MP_ADD_CARRY(r1b, 0, r1b, carry, carry);
136 MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
137 MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
138 r3 = carry;
141 /* check for final reduction */
143 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
144 * 0xffffffffffffffff. That means we can only be over and need
145 * one more reduction
146 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
147 * and
148 * r1 == 0xffffffffffffffffff or
149 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
150 * In all cases, we subtract the field (or add the 2's
151 * complement value (1,1,0)). (r0, r1, r2)
153 if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
154 && (r1b == 0xffffffff) ) &&
155 ((r1a == 0xffffffff) ||
156 (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
157 (r0b == 0xffffffff)) ) {
158 /* do a quick subtract */
159 MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
160 r0b += carry;
161 r1a = r1b = r2a = r2b = 0;
164 /* set the lower words of r */
165 if (a != r) {
166 MP_CHECKOK(s_mp_pad(r, 6));
168 MP_DIGIT(r, 5) = r2b;
169 MP_DIGIT(r, 4) = r2a;
170 MP_DIGIT(r, 3) = r1b;
171 MP_DIGIT(r, 2) = r1a;
172 MP_DIGIT(r, 1) = r0b;
173 MP_DIGIT(r, 0) = r0a;
174 MP_USED(r) = 6;
175 #else
176 switch (a_used) {
177 case 6:
178 a5 = MP_DIGIT(a, 5);
179 case 5:
180 a4 = MP_DIGIT(a, 4);
181 case 4:
182 a3 = MP_DIGIT(a, 3);
185 r2 = MP_DIGIT(a, 2);
186 r1 = MP_DIGIT(a, 1);
187 r0 = MP_DIGIT(a, 0);
189 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
190 #ifndef MPI_AMD64_ADD
191 MP_ADD_CARRY(r0, a3, r0, 0, carry);
192 MP_ADD_CARRY(r1, a3, r1, carry, carry);
193 MP_ADD_CARRY(r2, a4, r2, carry, carry);
194 r3 = carry;
195 MP_ADD_CARRY(r0, a5, r0, 0, carry);
196 MP_ADD_CARRY(r1, a5, r1, carry, carry);
197 MP_ADD_CARRY(r2, a5, r2, carry, carry);
198 r3 += carry;
199 MP_ADD_CARRY(r1, a4, r1, 0, carry);
200 MP_ADD_CARRY(r2, 0, r2, carry, carry);
201 r3 += carry;
203 #else
204 r2 = MP_DIGIT(a, 2);
205 r1 = MP_DIGIT(a, 1);
206 r0 = MP_DIGIT(a, 0);
208 /* set the lower words of r */
209 __asm__ (
210 "xorq %3,%3 \n\t"
211 "addq %4,%0 \n\t"
212 "adcq %4,%1 \n\t"
213 "adcq %5,%2 \n\t"
214 "adcq $0,%3 \n\t"
215 "addq %6,%0 \n\t"
216 "adcq %6,%1 \n\t"
217 "adcq %6,%2 \n\t"
218 "adcq $0,%3 \n\t"
219 "addq %5,%1 \n\t"
220 "adcq $0,%2 \n\t"
221 "adcq $0,%3 \n\t"
222 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
223 "=r"(a4), "=r"(a5)
224 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
225 "4" (a3), "5" (a4), "6"(a5)
226 : "%cc" );
227 #endif
229 /* reduce out the carry */
230 while (r3) {
231 #ifndef MPI_AMD64_ADD
232 MP_ADD_CARRY(r0, r3, r0, 0, carry);
233 MP_ADD_CARRY(r1, r3, r1, carry, carry);
234 MP_ADD_CARRY(r2, 0, r2, carry, carry);
235 r3 = carry;
236 #else
237 a3=r3;
238 __asm__ (
239 "xorq %3,%3 \n\t"
240 "addq %4,%0 \n\t"
241 "adcq %4,%1 \n\t"
242 "adcq $0,%2 \n\t"
243 "adcq $0,%3 \n\t"
244 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
245 : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
246 : "%cc" );
247 #endif
250 /* check for final reduction */
252 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
253 * 0xffffffffffffffff. That means we can only be over and need
254 * one more reduction
255 * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
256 * and
257 * r1 == 0xffffffffffffffffff or
258 * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
259 * In all cases, we subtract the field (or add the 2's
260 * complement value (1,1,0)). (r0, r1, r2)
262 if (r3 || ((r2 == MP_DIGIT_MAX) &&
263 ((r1 == MP_DIGIT_MAX) ||
264 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
265 /* do a quick subtract */
266 r0++;
267 r1 = r2 = 0;
269 /* set the lower words of r */
270 if (a != r) {
271 MP_CHECKOK(s_mp_pad(r, 3));
273 MP_DIGIT(r, 2) = r2;
274 MP_DIGIT(r, 1) = r1;
275 MP_DIGIT(r, 0) = r0;
276 MP_USED(r) = 3;
277 #endif
280 CLEANUP:
281 return res;
284 #ifndef ECL_THIRTY_TWO_BIT
285 /* Compute the sum of 192 bit curves. Do the work in-line since the
286 * number of words are so small, we don't want to overhead of mp function
287 * calls. Uses optimized modular reduction for p192.
289 mp_err
290 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
291 const GFMethod *meth)
293 mp_err res = MP_OKAY;
294 mp_digit a0 = 0, a1 = 0, a2 = 0;
295 mp_digit r0 = 0, r1 = 0, r2 = 0;
296 mp_digit carry;
298 switch(MP_USED(a)) {
299 case 3:
300 a2 = MP_DIGIT(a,2);
301 case 2:
302 a1 = MP_DIGIT(a,1);
303 case 1:
304 a0 = MP_DIGIT(a,0);
306 switch(MP_USED(b)) {
307 case 3:
308 r2 = MP_DIGIT(b,2);
309 case 2:
310 r1 = MP_DIGIT(b,1);
311 case 1:
312 r0 = MP_DIGIT(b,0);
315 #ifndef MPI_AMD64_ADD
316 MP_ADD_CARRY(a0, r0, r0, 0, carry);
317 MP_ADD_CARRY(a1, r1, r1, carry, carry);
318 MP_ADD_CARRY(a2, r2, r2, carry, carry);
319 #else
320 __asm__ (
321 "xorq %3,%3 \n\t"
322 "addq %4,%0 \n\t"
323 "adcq %5,%1 \n\t"
324 "adcq %6,%2 \n\t"
325 "adcq $0,%3 \n\t"
326 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
327 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
328 "1" (r1), "2" (r2)
329 : "%cc" );
330 #endif
332 /* Do quick 'subract' if we've gone over
333 * (add the 2's complement of the curve field) */
334 if (carry || ((r2 == MP_DIGIT_MAX) &&
335 ((r1 == MP_DIGIT_MAX) ||
336 ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
337 #ifndef MPI_AMD64_ADD
338 MP_ADD_CARRY(r0, 1, r0, 0, carry);
339 MP_ADD_CARRY(r1, 1, r1, carry, carry);
340 MP_ADD_CARRY(r2, 0, r2, carry, carry);
341 #else
342 __asm__ (
343 "addq $1,%0 \n\t"
344 "adcq $1,%1 \n\t"
345 "adcq $0,%2 \n\t"
346 : "=r"(r0), "=r"(r1), "=r"(r2)
347 : "0" (r0), "1" (r1), "2" (r2)
348 : "%cc" );
349 #endif
353 MP_CHECKOK(s_mp_pad(r, 3));
354 MP_DIGIT(r, 2) = r2;
355 MP_DIGIT(r, 1) = r1;
356 MP_DIGIT(r, 0) = r0;
357 MP_SIGN(r) = MP_ZPOS;
358 MP_USED(r) = 3;
359 s_mp_clamp(r);
362 CLEANUP:
363 return res;
366 /* Compute the diff of 192 bit curves. Do the work in-line since the
367 * number of words are so small, we don't want to overhead of mp function
368 * calls. Uses optimized modular reduction for p192.
370 mp_err
371 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
372 const GFMethod *meth)
374 mp_err res = MP_OKAY;
375 mp_digit b0 = 0, b1 = 0, b2 = 0;
376 mp_digit r0 = 0, r1 = 0, r2 = 0;
377 mp_digit borrow;
379 switch(MP_USED(a)) {
380 case 3:
381 r2 = MP_DIGIT(a,2);
382 case 2:
383 r1 = MP_DIGIT(a,1);
384 case 1:
385 r0 = MP_DIGIT(a,0);
388 switch(MP_USED(b)) {
389 case 3:
390 b2 = MP_DIGIT(b,2);
391 case 2:
392 b1 = MP_DIGIT(b,1);
393 case 1:
394 b0 = MP_DIGIT(b,0);
397 #ifndef MPI_AMD64_ADD
398 MP_SUB_BORROW(r0, b0, r0, 0, borrow);
399 MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
400 MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
401 #else
402 __asm__ (
403 "xorq %3,%3 \n\t"
404 "subq %4,%0 \n\t"
405 "sbbq %5,%1 \n\t"
406 "sbbq %6,%2 \n\t"
407 "adcq $0,%3 \n\t"
408 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
409 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
410 "1" (r1), "2" (r2)
411 : "%cc" );
412 #endif
414 /* Do quick 'add' if we've gone under 0
415 * (subtract the 2's complement of the curve field) */
416 if (borrow) {
417 #ifndef MPI_AMD64_ADD
418 MP_SUB_BORROW(r0, 1, r0, 0, borrow);
419 MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
420 MP_SUB_BORROW(r2, 0, r2, borrow, borrow);
421 #else
422 __asm__ (
423 "subq $1,%0 \n\t"
424 "sbbq $1,%1 \n\t"
425 "sbbq $0,%2 \n\t"
426 : "=r"(r0), "=r"(r1), "=r"(r2)
427 : "0" (r0), "1" (r1), "2" (r2)
428 : "%cc" );
429 #endif
432 MP_CHECKOK(s_mp_pad(r, 3));
433 MP_DIGIT(r, 2) = r2;
434 MP_DIGIT(r, 1) = r1;
435 MP_DIGIT(r, 0) = r0;
436 MP_SIGN(r) = MP_ZPOS;
437 MP_USED(r) = 3;
438 s_mp_clamp(r);
440 CLEANUP:
441 return res;
444 #endif
446 /* Compute the square of polynomial a, reduce modulo p192. Store the
447 * result in r. r could be a. Uses optimized modular reduction for p192.
449 mp_err
450 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
452 mp_err res = MP_OKAY;
454 MP_CHECKOK(mp_sqr(a, r));
455 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
456 CLEANUP:
457 return res;
460 /* Compute the product of two polynomials a and b, reduce modulo p192.
461 * Store the result in r. r could be a or b; a could be b. Uses
462 * optimized modular reduction for p192. */
463 mp_err
464 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
465 const GFMethod *meth)
467 mp_err res = MP_OKAY;
469 MP_CHECKOK(mp_mul(a, b, r));
470 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
471 CLEANUP:
472 return res;
475 /* Divides two field elements. If a is NULL, then returns the inverse of
476 * b. */
477 mp_err
478 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
479 const GFMethod *meth)
481 mp_err res = MP_OKAY;
482 mp_int t;
484 /* If a is NULL, then return the inverse of b, otherwise return a/b. */
485 if (a == NULL) {
486 return mp_invmod(b, &meth->irr, r);
487 } else {
488 /* MPI doesn't support divmod, so we implement it using invmod and
489 * mulmod. */
490 MP_CHECKOK(mp_init(&t));
491 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
492 MP_CHECKOK(mp_mul(a, &t, r));
493 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
494 CLEANUP:
495 mp_clear(&t);
496 return res;
500 /* Wire in fast field arithmetic and precomputation of base point for
501 * named curves. */
502 mp_err
503 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
505 if (name == ECCurve_NIST_P192) {
506 group->meth->field_mod = &ec_GFp_nistp192_mod;
507 group->meth->field_mul = &ec_GFp_nistp192_mul;
508 group->meth->field_sqr = &ec_GFp_nistp192_sqr;
509 group->meth->field_div = &ec_GFp_nistp192_div;
510 #ifndef ECL_THIRTY_TWO_BIT
511 group->meth->field_add = &ec_GFp_nistp192_add;
512 group->meth->field_sub = &ec_GFp_nistp192_sub;
513 #endif
515 return MP_OKAY;