Allow IPv6 address entry in tools>ping - Loosens valid character check
[tomato/davidwu.git] / release / src / router / openssl / crypto / ec / ecp_nistp521.c
blob178b655f7f177c7e5dd5a0dba7fe5db623bdc0ed
1 /* crypto/ec/ecp_nistp521.c */
2 /*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
29 #include <openssl/opensslconf.h>
30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
33 #include <stdint.h>
34 #else
35 #include <inttypes.h>
36 #endif
38 #include <string.h>
39 #include <openssl/err.h>
40 #include "ec_lcl.h"
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 #else
46 #error "Need GCC 3.1 or later to define type uint128_t"
47 #endif
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 typedef int64_t s64;
53 /* The underlying field.
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
59 typedef u8 felem_bytearray[66];
61 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63 static const felem_bytearray nistp521_curve_params[5] =
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff},
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xfc},
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
91 0x3f, 0x00},
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
100 0xbd, 0x66},
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 0x66, 0x50}
112 /* The representation of field elements.
113 * ------------------------------------
115 * We represent field elements with nine values. These values are either 64 or
116 * 128 bits and the field element represented is:
117 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
118 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
119 * 58 bits apart, but are greater than 58 bits in length, the most significant
120 * bits of each limb overlap with the least significant bits of the next.
122 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
123 * 'largefelem' */
125 #define NLIMBS 9
127 typedef uint64_t limb;
128 typedef limb felem[NLIMBS];
129 typedef uint128_t largefelem[NLIMBS];
131 static const limb bottom57bits = 0x1ffffffffffffff;
132 static const limb bottom58bits = 0x3ffffffffffffff;
134 /* bin66_to_felem takes a little-endian byte array and converts it into felem
135 * form. This assumes that the CPU is little-endian. */
136 static void bin66_to_felem(felem out, const u8 in[66])
138 out[0] = (*((limb*) &in[0])) & bottom58bits;
139 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
140 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
141 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
142 out[4] = (*((limb*) &in[29])) & bottom58bits;
143 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
144 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
145 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
146 out[8] = (*((limb*) &in[58])) & bottom57bits;
149 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
150 * array. This assumes that the CPU is little-endian. */
151 static void felem_to_bin66(u8 out[66], const felem in)
153 memset(out, 0, 66);
154 (*((limb*) &out[0])) = in[0];
155 (*((limb*) &out[7])) |= in[1] << 2;
156 (*((limb*) &out[14])) |= in[2] << 4;
157 (*((limb*) &out[21])) |= in[3] << 6;
158 (*((limb*) &out[29])) = in[4];
159 (*((limb*) &out[36])) |= in[5] << 2;
160 (*((limb*) &out[43])) |= in[6] << 4;
161 (*((limb*) &out[50])) |= in[7] << 6;
162 (*((limb*) &out[58])) = in[8];
165 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
166 static void flip_endian(u8 *out, const u8 *in, unsigned len)
168 unsigned i;
169 for (i = 0; i < len; ++i)
170 out[i] = in[len-1-i];
173 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
174 static int BN_to_felem(felem out, const BIGNUM *bn)
176 felem_bytearray b_in;
177 felem_bytearray b_out;
178 unsigned num_bytes;
180 /* BN_bn2bin eats leading zeroes */
181 memset(b_out, 0, sizeof b_out);
182 num_bytes = BN_num_bytes(bn);
183 if (num_bytes > sizeof b_out)
185 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
186 return 0;
188 if (BN_is_negative(bn))
190 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
191 return 0;
193 num_bytes = BN_bn2bin(bn, b_in);
194 flip_endian(b_out, b_in, num_bytes);
195 bin66_to_felem(out, b_out);
196 return 1;
199 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
200 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
202 felem_bytearray b_in, b_out;
203 felem_to_bin66(b_in, in);
204 flip_endian(b_out, b_in, sizeof b_out);
205 return BN_bin2bn(b_out, sizeof b_out, out);
209 /* Field operations
210 * ---------------- */
212 static void felem_one(felem out)
214 out[0] = 1;
215 out[1] = 0;
216 out[2] = 0;
217 out[3] = 0;
218 out[4] = 0;
219 out[5] = 0;
220 out[6] = 0;
221 out[7] = 0;
222 out[8] = 0;
225 static void felem_assign(felem out, const felem in)
227 out[0] = in[0];
228 out[1] = in[1];
229 out[2] = in[2];
230 out[3] = in[3];
231 out[4] = in[4];
232 out[5] = in[5];
233 out[6] = in[6];
234 out[7] = in[7];
235 out[8] = in[8];
238 /* felem_sum64 sets out = out + in. */
239 static void felem_sum64(felem out, const felem in)
241 out[0] += in[0];
242 out[1] += in[1];
243 out[2] += in[2];
244 out[3] += in[3];
245 out[4] += in[4];
246 out[5] += in[5];
247 out[6] += in[6];
248 out[7] += in[7];
249 out[8] += in[8];
252 /* felem_scalar sets out = in * scalar */
253 static void felem_scalar(felem out, const felem in, limb scalar)
255 out[0] = in[0] * scalar;
256 out[1] = in[1] * scalar;
257 out[2] = in[2] * scalar;
258 out[3] = in[3] * scalar;
259 out[4] = in[4] * scalar;
260 out[5] = in[5] * scalar;
261 out[6] = in[6] * scalar;
262 out[7] = in[7] * scalar;
263 out[8] = in[8] * scalar;
266 /* felem_scalar64 sets out = out * scalar */
267 static void felem_scalar64(felem out, limb scalar)
269 out[0] *= scalar;
270 out[1] *= scalar;
271 out[2] *= scalar;
272 out[3] *= scalar;
273 out[4] *= scalar;
274 out[5] *= scalar;
275 out[6] *= scalar;
276 out[7] *= scalar;
277 out[8] *= scalar;
280 /* felem_scalar128 sets out = out * scalar */
281 static void felem_scalar128(largefelem out, limb scalar)
283 out[0] *= scalar;
284 out[1] *= scalar;
285 out[2] *= scalar;
286 out[3] *= scalar;
287 out[4] *= scalar;
288 out[5] *= scalar;
289 out[6] *= scalar;
290 out[7] *= scalar;
291 out[8] *= scalar;
294 /* felem_neg sets |out| to |-in|
295 * On entry:
296 * in[i] < 2^59 + 2^14
297 * On exit:
298 * out[i] < 2^62
300 static void felem_neg(felem out, const felem in)
302 /* In order to prevent underflow, we subtract from 0 mod p. */
303 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
304 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
306 out[0] = two62m3 - in[0];
307 out[1] = two62m2 - in[1];
308 out[2] = two62m2 - in[2];
309 out[3] = two62m2 - in[3];
310 out[4] = two62m2 - in[4];
311 out[5] = two62m2 - in[5];
312 out[6] = two62m2 - in[6];
313 out[7] = two62m2 - in[7];
314 out[8] = two62m2 - in[8];
317 /* felem_diff64 subtracts |in| from |out|
318 * On entry:
319 * in[i] < 2^59 + 2^14
320 * On exit:
321 * out[i] < out[i] + 2^62
323 static void felem_diff64(felem out, const felem in)
325 /* In order to prevent underflow, we add 0 mod p before subtracting. */
326 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
327 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
329 out[0] += two62m3 - in[0];
330 out[1] += two62m2 - in[1];
331 out[2] += two62m2 - in[2];
332 out[3] += two62m2 - in[3];
333 out[4] += two62m2 - in[4];
334 out[5] += two62m2 - in[5];
335 out[6] += two62m2 - in[6];
336 out[7] += two62m2 - in[7];
337 out[8] += two62m2 - in[8];
340 /* felem_diff_128_64 subtracts |in| from |out|
341 * On entry:
342 * in[i] < 2^62 + 2^17
343 * On exit:
344 * out[i] < out[i] + 2^63
346 static void felem_diff_128_64(largefelem out, const felem in)
348 /* In order to prevent underflow, we add 0 mod p before subtracting. */
349 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
350 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
352 out[0] += two63m6 - in[0];
353 out[1] += two63m5 - in[1];
354 out[2] += two63m5 - in[2];
355 out[3] += two63m5 - in[3];
356 out[4] += two63m5 - in[4];
357 out[5] += two63m5 - in[5];
358 out[6] += two63m5 - in[6];
359 out[7] += two63m5 - in[7];
360 out[8] += two63m5 - in[8];
363 /* felem_diff_128_64 subtracts |in| from |out|
364 * On entry:
365 * in[i] < 2^126
366 * On exit:
367 * out[i] < out[i] + 2^127 - 2^69
369 static void felem_diff128(largefelem out, const largefelem in)
371 /* In order to prevent underflow, we add 0 mod p before subtracting. */
372 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
373 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
375 out[0] += (two127m70 - in[0]);
376 out[1] += (two127m69 - in[1]);
377 out[2] += (two127m69 - in[2]);
378 out[3] += (two127m69 - in[3]);
379 out[4] += (two127m69 - in[4]);
380 out[5] += (two127m69 - in[5]);
381 out[6] += (two127m69 - in[6]);
382 out[7] += (two127m69 - in[7]);
383 out[8] += (two127m69 - in[8]);
386 /* felem_square sets |out| = |in|^2
387 * On entry:
388 * in[i] < 2^62
389 * On exit:
390 * out[i] < 17 * max(in[i]) * max(in[i])
392 static void felem_square(largefelem out, const felem in)
394 felem inx2, inx4;
395 felem_scalar(inx2, in, 2);
396 felem_scalar(inx4, in, 4);
398 /* We have many cases were we want to do
399 * in[x] * in[y] +
400 * in[y] * in[x]
401 * This is obviously just
402 * 2 * in[x] * in[y]
403 * However, rather than do the doubling on the 128 bit result, we
404 * double one of the inputs to the multiplication by reading from
405 * |inx2| */
407 out[0] = ((uint128_t) in[0]) * in[0];
408 out[1] = ((uint128_t) in[0]) * inx2[1];
409 out[2] = ((uint128_t) in[0]) * inx2[2] +
410 ((uint128_t) in[1]) * in[1];
411 out[3] = ((uint128_t) in[0]) * inx2[3] +
412 ((uint128_t) in[1]) * inx2[2];
413 out[4] = ((uint128_t) in[0]) * inx2[4] +
414 ((uint128_t) in[1]) * inx2[3] +
415 ((uint128_t) in[2]) * in[2];
416 out[5] = ((uint128_t) in[0]) * inx2[5] +
417 ((uint128_t) in[1]) * inx2[4] +
418 ((uint128_t) in[2]) * inx2[3];
419 out[6] = ((uint128_t) in[0]) * inx2[6] +
420 ((uint128_t) in[1]) * inx2[5] +
421 ((uint128_t) in[2]) * inx2[4] +
422 ((uint128_t) in[3]) * in[3];
423 out[7] = ((uint128_t) in[0]) * inx2[7] +
424 ((uint128_t) in[1]) * inx2[6] +
425 ((uint128_t) in[2]) * inx2[5] +
426 ((uint128_t) in[3]) * inx2[4];
427 out[8] = ((uint128_t) in[0]) * inx2[8] +
428 ((uint128_t) in[1]) * inx2[7] +
429 ((uint128_t) in[2]) * inx2[6] +
430 ((uint128_t) in[3]) * inx2[5] +
431 ((uint128_t) in[4]) * in[4];
433 /* The remaining limbs fall above 2^521, with the first falling at
434 * 2^522. They correspond to locations one bit up from the limbs
435 * produced above so we would have to multiply by two to align them.
436 * Again, rather than operate on the 128-bit result, we double one of
437 * the inputs to the multiplication. If we want to double for both this
438 * reason, and the reason above, then we end up multiplying by four. */
440 /* 9 */
441 out[0] += ((uint128_t) in[1]) * inx4[8] +
442 ((uint128_t) in[2]) * inx4[7] +
443 ((uint128_t) in[3]) * inx4[6] +
444 ((uint128_t) in[4]) * inx4[5];
446 /* 10 */
447 out[1] += ((uint128_t) in[2]) * inx4[8] +
448 ((uint128_t) in[3]) * inx4[7] +
449 ((uint128_t) in[4]) * inx4[6] +
450 ((uint128_t) in[5]) * inx2[5];
452 /* 11 */
453 out[2] += ((uint128_t) in[3]) * inx4[8] +
454 ((uint128_t) in[4]) * inx4[7] +
455 ((uint128_t) in[5]) * inx4[6];
457 /* 12 */
458 out[3] += ((uint128_t) in[4]) * inx4[8] +
459 ((uint128_t) in[5]) * inx4[7] +
460 ((uint128_t) in[6]) * inx2[6];
462 /* 13 */
463 out[4] += ((uint128_t) in[5]) * inx4[8] +
464 ((uint128_t) in[6]) * inx4[7];
466 /* 14 */
467 out[5] += ((uint128_t) in[6]) * inx4[8] +
468 ((uint128_t) in[7]) * inx2[7];
470 /* 15 */
471 out[6] += ((uint128_t) in[7]) * inx4[8];
473 /* 16 */
474 out[7] += ((uint128_t) in[8]) * inx2[8];
477 /* felem_mul sets |out| = |in1| * |in2|
478 * On entry:
479 * in1[i] < 2^64
480 * in2[i] < 2^63
481 * On exit:
482 * out[i] < 17 * max(in1[i]) * max(in2[i])
484 static void felem_mul(largefelem out, const felem in1, const felem in2)
486 felem in2x2;
487 felem_scalar(in2x2, in2, 2);
489 out[0] = ((uint128_t) in1[0]) * in2[0];
491 out[1] = ((uint128_t) in1[0]) * in2[1] +
492 ((uint128_t) in1[1]) * in2[0];
494 out[2] = ((uint128_t) in1[0]) * in2[2] +
495 ((uint128_t) in1[1]) * in2[1] +
496 ((uint128_t) in1[2]) * in2[0];
498 out[3] = ((uint128_t) in1[0]) * in2[3] +
499 ((uint128_t) in1[1]) * in2[2] +
500 ((uint128_t) in1[2]) * in2[1] +
501 ((uint128_t) in1[3]) * in2[0];
503 out[4] = ((uint128_t) in1[0]) * in2[4] +
504 ((uint128_t) in1[1]) * in2[3] +
505 ((uint128_t) in1[2]) * in2[2] +
506 ((uint128_t) in1[3]) * in2[1] +
507 ((uint128_t) in1[4]) * in2[0];
509 out[5] = ((uint128_t) in1[0]) * in2[5] +
510 ((uint128_t) in1[1]) * in2[4] +
511 ((uint128_t) in1[2]) * in2[3] +
512 ((uint128_t) in1[3]) * in2[2] +
513 ((uint128_t) in1[4]) * in2[1] +
514 ((uint128_t) in1[5]) * in2[0];
516 out[6] = ((uint128_t) in1[0]) * in2[6] +
517 ((uint128_t) in1[1]) * in2[5] +
518 ((uint128_t) in1[2]) * in2[4] +
519 ((uint128_t) in1[3]) * in2[3] +
520 ((uint128_t) in1[4]) * in2[2] +
521 ((uint128_t) in1[5]) * in2[1] +
522 ((uint128_t) in1[6]) * in2[0];
524 out[7] = ((uint128_t) in1[0]) * in2[7] +
525 ((uint128_t) in1[1]) * in2[6] +
526 ((uint128_t) in1[2]) * in2[5] +
527 ((uint128_t) in1[3]) * in2[4] +
528 ((uint128_t) in1[4]) * in2[3] +
529 ((uint128_t) in1[5]) * in2[2] +
530 ((uint128_t) in1[6]) * in2[1] +
531 ((uint128_t) in1[7]) * in2[0];
533 out[8] = ((uint128_t) in1[0]) * in2[8] +
534 ((uint128_t) in1[1]) * in2[7] +
535 ((uint128_t) in1[2]) * in2[6] +
536 ((uint128_t) in1[3]) * in2[5] +
537 ((uint128_t) in1[4]) * in2[4] +
538 ((uint128_t) in1[5]) * in2[3] +
539 ((uint128_t) in1[6]) * in2[2] +
540 ((uint128_t) in1[7]) * in2[1] +
541 ((uint128_t) in1[8]) * in2[0];
543 /* See comment in felem_square about the use of in2x2 here */
545 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
546 ((uint128_t) in1[2]) * in2x2[7] +
547 ((uint128_t) in1[3]) * in2x2[6] +
548 ((uint128_t) in1[4]) * in2x2[5] +
549 ((uint128_t) in1[5]) * in2x2[4] +
550 ((uint128_t) in1[6]) * in2x2[3] +
551 ((uint128_t) in1[7]) * in2x2[2] +
552 ((uint128_t) in1[8]) * in2x2[1];
554 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
555 ((uint128_t) in1[3]) * in2x2[7] +
556 ((uint128_t) in1[4]) * in2x2[6] +
557 ((uint128_t) in1[5]) * in2x2[5] +
558 ((uint128_t) in1[6]) * in2x2[4] +
559 ((uint128_t) in1[7]) * in2x2[3] +
560 ((uint128_t) in1[8]) * in2x2[2];
562 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
563 ((uint128_t) in1[4]) * in2x2[7] +
564 ((uint128_t) in1[5]) * in2x2[6] +
565 ((uint128_t) in1[6]) * in2x2[5] +
566 ((uint128_t) in1[7]) * in2x2[4] +
567 ((uint128_t) in1[8]) * in2x2[3];
569 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
570 ((uint128_t) in1[5]) * in2x2[7] +
571 ((uint128_t) in1[6]) * in2x2[6] +
572 ((uint128_t) in1[7]) * in2x2[5] +
573 ((uint128_t) in1[8]) * in2x2[4];
575 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
576 ((uint128_t) in1[6]) * in2x2[7] +
577 ((uint128_t) in1[7]) * in2x2[6] +
578 ((uint128_t) in1[8]) * in2x2[5];
580 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
581 ((uint128_t) in1[7]) * in2x2[7] +
582 ((uint128_t) in1[8]) * in2x2[6];
584 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
585 ((uint128_t) in1[8]) * in2x2[7];
587 out[7] += ((uint128_t) in1[8]) * in2x2[8];
590 static const limb bottom52bits = 0xfffffffffffff;
592 /* felem_reduce converts a largefelem to an felem.
593 * On entry:
594 * in[i] < 2^128
595 * On exit:
596 * out[i] < 2^59 + 2^14
598 static void felem_reduce(felem out, const largefelem in)
600 u64 overflow1, overflow2;
602 out[0] = ((limb) in[0]) & bottom58bits;
603 out[1] = ((limb) in[1]) & bottom58bits;
604 out[2] = ((limb) in[2]) & bottom58bits;
605 out[3] = ((limb) in[3]) & bottom58bits;
606 out[4] = ((limb) in[4]) & bottom58bits;
607 out[5] = ((limb) in[5]) & bottom58bits;
608 out[6] = ((limb) in[6]) & bottom58bits;
609 out[7] = ((limb) in[7]) & bottom58bits;
610 out[8] = ((limb) in[8]) & bottom58bits;
612 /* out[i] < 2^58 */
614 out[1] += ((limb) in[0]) >> 58;
615 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
616 /* out[1] < 2^58 + 2^6 + 2^58
617 * = 2^59 + 2^6 */
618 out[2] += ((limb) (in[0] >> 64)) >> 52;
620 out[2] += ((limb) in[1]) >> 58;
621 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
622 out[3] += ((limb) (in[1] >> 64)) >> 52;
624 out[3] += ((limb) in[2]) >> 58;
625 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
626 out[4] += ((limb) (in[2] >> 64)) >> 52;
628 out[4] += ((limb) in[3]) >> 58;
629 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
630 out[5] += ((limb) (in[3] >> 64)) >> 52;
632 out[5] += ((limb) in[4]) >> 58;
633 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
634 out[6] += ((limb) (in[4] >> 64)) >> 52;
636 out[6] += ((limb) in[5]) >> 58;
637 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
638 out[7] += ((limb) (in[5] >> 64)) >> 52;
640 out[7] += ((limb) in[6]) >> 58;
641 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
642 out[8] += ((limb) (in[6] >> 64)) >> 52;
644 out[8] += ((limb) in[7]) >> 58;
645 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
646 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
647 * < 2^59 + 2^13 */
648 overflow1 = ((limb) (in[7] >> 64)) >> 52;
650 overflow1 += ((limb) in[8]) >> 58;
651 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
652 overflow2 = ((limb) (in[8] >> 64)) >> 52;
654 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
655 overflow2 <<= 1; /* overflow2 < 2^13 */
657 out[0] += overflow1; /* out[0] < 2^60 */
658 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
660 out[1] += out[0] >> 58; out[0] &= bottom58bits;
661 /* out[0] < 2^58
662 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
663 * < 2^59 + 2^14 */
666 static void felem_square_reduce(felem out, const felem in)
668 largefelem tmp;
669 felem_square(tmp, in);
670 felem_reduce(out, tmp);
673 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
675 largefelem tmp;
676 felem_mul(tmp, in1, in2);
677 felem_reduce(out, tmp);
680 /* felem_inv calculates |out| = |in|^{-1}
682 * Based on Fermat's Little Theorem:
683 * a^p = a (mod p)
684 * a^{p-1} = 1 (mod p)
685 * a^{p-2} = a^{-1} (mod p)
687 static void felem_inv(felem out, const felem in)
689 felem ftmp, ftmp2, ftmp3, ftmp4;
690 largefelem tmp;
691 unsigned i;
693 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
694 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
695 felem_assign(ftmp2, ftmp);
696 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
697 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
698 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
700 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
701 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
702 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
704 felem_assign(ftmp2, ftmp3);
705 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
707 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
708 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
709 felem_assign(ftmp4, ftmp3);
710 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
711 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
712 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
713 felem_assign(ftmp2, ftmp3);
715 for (i = 0; i < 8; i++)
717 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
719 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
720 felem_assign(ftmp2, ftmp3);
722 for (i = 0; i < 16; i++)
724 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
726 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
727 felem_assign(ftmp2, ftmp3);
729 for (i = 0; i < 32; i++)
731 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
733 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
734 felem_assign(ftmp2, ftmp3);
736 for (i = 0; i < 64; i++)
738 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
740 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
741 felem_assign(ftmp2, ftmp3);
743 for (i = 0; i < 128; i++)
745 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
747 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
748 felem_assign(ftmp2, ftmp3);
750 for (i = 0; i < 256; i++)
752 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
754 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
756 for (i = 0; i < 9; i++)
758 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
760 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
761 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
764 /* This is 2^521-1, expressed as an felem */
765 static const felem kPrime =
767 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
768 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
769 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
772 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
773 * otherwise.
774 * On entry:
775 * in[i] < 2^59 + 2^14
777 static limb felem_is_zero(const felem in)
779 felem ftmp;
780 limb is_zero, is_p;
781 felem_assign(ftmp, in);
783 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
784 /* ftmp[8] < 2^57 */
785 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
786 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
787 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
788 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
789 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
790 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
791 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
792 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
793 /* ftmp[8] < 2^57 + 4 */
795 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
796 * greater than our bound for ftmp[8]. Therefore we only have to check
797 * if the zero is zero or 2^521-1. */
799 is_zero = 0;
800 is_zero |= ftmp[0];
801 is_zero |= ftmp[1];
802 is_zero |= ftmp[2];
803 is_zero |= ftmp[3];
804 is_zero |= ftmp[4];
805 is_zero |= ftmp[5];
806 is_zero |= ftmp[6];
807 is_zero |= ftmp[7];
808 is_zero |= ftmp[8];
810 is_zero--;
811 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
812 * can be set is if is_zero was 0 before the decrement. */
813 is_zero = ((s64) is_zero) >> 63;
815 is_p = ftmp[0] ^ kPrime[0];
816 is_p |= ftmp[1] ^ kPrime[1];
817 is_p |= ftmp[2] ^ kPrime[2];
818 is_p |= ftmp[3] ^ kPrime[3];
819 is_p |= ftmp[4] ^ kPrime[4];
820 is_p |= ftmp[5] ^ kPrime[5];
821 is_p |= ftmp[6] ^ kPrime[6];
822 is_p |= ftmp[7] ^ kPrime[7];
823 is_p |= ftmp[8] ^ kPrime[8];
825 is_p--;
826 is_p = ((s64) is_p) >> 63;
828 is_zero |= is_p;
829 return is_zero;
832 static int felem_is_zero_int(const felem in)
834 return (int) (felem_is_zero(in) & ((limb)1));
837 /* felem_contract converts |in| to its unique, minimal representation.
838 * On entry:
839 * in[i] < 2^59 + 2^14
841 static void felem_contract(felem out, const felem in)
843 limb is_p, is_greater, sign;
844 static const limb two58 = ((limb)1) << 58;
846 felem_assign(out, in);
848 out[0] += out[8] >> 57; out[8] &= bottom57bits;
849 /* out[8] < 2^57 */
850 out[1] += out[0] >> 58; out[0] &= bottom58bits;
851 out[2] += out[1] >> 58; out[1] &= bottom58bits;
852 out[3] += out[2] >> 58; out[2] &= bottom58bits;
853 out[4] += out[3] >> 58; out[3] &= bottom58bits;
854 out[5] += out[4] >> 58; out[4] &= bottom58bits;
855 out[6] += out[5] >> 58; out[5] &= bottom58bits;
856 out[7] += out[6] >> 58; out[6] &= bottom58bits;
857 out[8] += out[7] >> 58; out[7] &= bottom58bits;
858 /* out[8] < 2^57 + 4 */
860 /* If the value is greater than 2^521-1 then we have to subtract
861 * 2^521-1 out. See the comments in felem_is_zero regarding why we
862 * don't test for other multiples of the prime. */
864 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
866 is_p = out[0] ^ kPrime[0];
867 is_p |= out[1] ^ kPrime[1];
868 is_p |= out[2] ^ kPrime[2];
869 is_p |= out[3] ^ kPrime[3];
870 is_p |= out[4] ^ kPrime[4];
871 is_p |= out[5] ^ kPrime[5];
872 is_p |= out[6] ^ kPrime[6];
873 is_p |= out[7] ^ kPrime[7];
874 is_p |= out[8] ^ kPrime[8];
876 is_p--;
877 is_p &= is_p << 32;
878 is_p &= is_p << 16;
879 is_p &= is_p << 8;
880 is_p &= is_p << 4;
881 is_p &= is_p << 2;
882 is_p &= is_p << 1;
883 is_p = ((s64) is_p) >> 63;
884 is_p = ~is_p;
886 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
888 out[0] &= is_p;
889 out[1] &= is_p;
890 out[2] &= is_p;
891 out[3] &= is_p;
892 out[4] &= is_p;
893 out[5] &= is_p;
894 out[6] &= is_p;
895 out[7] &= is_p;
896 out[8] &= is_p;
898 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
899 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
900 is_greater = out[8] >> 57;
901 is_greater |= is_greater << 32;
902 is_greater |= is_greater << 16;
903 is_greater |= is_greater << 8;
904 is_greater |= is_greater << 4;
905 is_greater |= is_greater << 2;
906 is_greater |= is_greater << 1;
907 is_greater = ((s64) is_greater) >> 63;
909 out[0] -= kPrime[0] & is_greater;
910 out[1] -= kPrime[1] & is_greater;
911 out[2] -= kPrime[2] & is_greater;
912 out[3] -= kPrime[3] & is_greater;
913 out[4] -= kPrime[4] & is_greater;
914 out[5] -= kPrime[5] & is_greater;
915 out[6] -= kPrime[6] & is_greater;
916 out[7] -= kPrime[7] & is_greater;
917 out[8] -= kPrime[8] & is_greater;
919 /* Eliminate negative coefficients */
920 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
921 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
922 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
923 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
924 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
925 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
926 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
927 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
928 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
929 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
930 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
933 /* Group operations
934 * ----------------
936 * Building on top of the field operations we have the operations on the
937 * elliptic curve group itself. Points on the curve are represented in Jacobian
938 * coordinates */
940 /* point_double calcuates 2*(x_in, y_in, z_in)
942 * The method is taken from:
943 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
945 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
946 * while x_out == y_in is not (maybe this works, but it's not tested). */
947 static void
948 point_double(felem x_out, felem y_out, felem z_out,
949 const felem x_in, const felem y_in, const felem z_in)
951 largefelem tmp, tmp2;
952 felem delta, gamma, beta, alpha, ftmp, ftmp2;
954 felem_assign(ftmp, x_in);
955 felem_assign(ftmp2, x_in);
957 /* delta = z^2 */
958 felem_square(tmp, z_in);
959 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
961 /* gamma = y^2 */
962 felem_square(tmp, y_in);
963 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
965 /* beta = x*gamma */
966 felem_mul(tmp, x_in, gamma);
967 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
969 /* alpha = 3*(x-delta)*(x+delta) */
970 felem_diff64(ftmp, delta);
971 /* ftmp[i] < 2^61 */
972 felem_sum64(ftmp2, delta);
973 /* ftmp2[i] < 2^60 + 2^15 */
974 felem_scalar64(ftmp2, 3);
975 /* ftmp2[i] < 3*2^60 + 3*2^15 */
976 felem_mul(tmp, ftmp, ftmp2);
977 /* tmp[i] < 17(3*2^121 + 3*2^76)
978 * = 61*2^121 + 61*2^76
979 * < 64*2^121 + 64*2^76
980 * = 2^127 + 2^82
981 * < 2^128 */
982 felem_reduce(alpha, tmp);
984 /* x' = alpha^2 - 8*beta */
985 felem_square(tmp, alpha);
986 /* tmp[i] < 17*2^120
987 * < 2^125 */
988 felem_assign(ftmp, beta);
989 felem_scalar64(ftmp, 8);
990 /* ftmp[i] < 2^62 + 2^17 */
991 felem_diff_128_64(tmp, ftmp);
992 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
993 felem_reduce(x_out, tmp);
995 /* z' = (y + z)^2 - gamma - delta */
996 felem_sum64(delta, gamma);
997 /* delta[i] < 2^60 + 2^15 */
998 felem_assign(ftmp, y_in);
999 felem_sum64(ftmp, z_in);
1000 /* ftmp[i] < 2^60 + 2^15 */
1001 felem_square(tmp, ftmp);
1002 /* tmp[i] < 17(2^122)
1003 * < 2^127 */
1004 felem_diff_128_64(tmp, delta);
1005 /* tmp[i] < 2^127 + 2^63 */
1006 felem_reduce(z_out, tmp);
1008 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1009 felem_scalar64(beta, 4);
1010 /* beta[i] < 2^61 + 2^16 */
1011 felem_diff64(beta, x_out);
1012 /* beta[i] < 2^61 + 2^60 + 2^16 */
1013 felem_mul(tmp, alpha, beta);
1014 /* tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1015 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1016 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1017 * < 2^128 */
1018 felem_square(tmp2, gamma);
1019 /* tmp2[i] < 17*(2^59 + 2^14)^2
1020 * = 17*(2^118 + 2^74 + 2^28) */
1021 felem_scalar128(tmp2, 8);
1022 /* tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1023 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1024 * < 2^126 */
1025 felem_diff128(tmp, tmp2);
1026 /* tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1027 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1028 * 2^74 + 2^69 + 2^34 + 2^30
1029 * < 2^128 */
1030 felem_reduce(y_out, tmp);
1033 /* copy_conditional copies in to out iff mask is all ones. */
1034 static void
1035 copy_conditional(felem out, const felem in, limb mask)
1037 unsigned i;
1038 for (i = 0; i < NLIMBS; ++i)
1040 const limb tmp = mask & (in[i] ^ out[i]);
1041 out[i] ^= tmp;
1045 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1047 * The method is taken from
1048 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1049 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1051 * This function includes a branch for checking whether the two input points
1052 * are equal (while not equal to the point at infinity). This case never
1053 * happens during single point multiplication, so there is no timing leak for
1054 * ECDH or ECDSA signing. */
1055 static void point_add(felem x3, felem y3, felem z3,
1056 const felem x1, const felem y1, const felem z1,
1057 const int mixed, const felem x2, const felem y2, const felem z2)
1059 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1060 largefelem tmp, tmp2;
1061 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1063 z1_is_zero = felem_is_zero(z1);
1064 z2_is_zero = felem_is_zero(z2);
1066 /* ftmp = z1z1 = z1**2 */
1067 felem_square(tmp, z1);
1068 felem_reduce(ftmp, tmp);
1070 if (!mixed)
1072 /* ftmp2 = z2z2 = z2**2 */
1073 felem_square(tmp, z2);
1074 felem_reduce(ftmp2, tmp);
1076 /* u1 = ftmp3 = x1*z2z2 */
1077 felem_mul(tmp, x1, ftmp2);
1078 felem_reduce(ftmp3, tmp);
1080 /* ftmp5 = z1 + z2 */
1081 felem_assign(ftmp5, z1);
1082 felem_sum64(ftmp5, z2);
1083 /* ftmp5[i] < 2^61 */
1085 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1086 felem_square(tmp, ftmp5);
1087 /* tmp[i] < 17*2^122 */
1088 felem_diff_128_64(tmp, ftmp);
1089 /* tmp[i] < 17*2^122 + 2^63 */
1090 felem_diff_128_64(tmp, ftmp2);
1091 /* tmp[i] < 17*2^122 + 2^64 */
1092 felem_reduce(ftmp5, tmp);
1094 /* ftmp2 = z2 * z2z2 */
1095 felem_mul(tmp, ftmp2, z2);
1096 felem_reduce(ftmp2, tmp);
1098 /* s1 = ftmp6 = y1 * z2**3 */
1099 felem_mul(tmp, y1, ftmp2);
1100 felem_reduce(ftmp6, tmp);
1102 else
1104 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1106 /* u1 = ftmp3 = x1*z2z2 */
1107 felem_assign(ftmp3, x1);
1109 /* ftmp5 = 2*z1z2 */
1110 felem_scalar(ftmp5, z1, 2);
1112 /* s1 = ftmp6 = y1 * z2**3 */
1113 felem_assign(ftmp6, y1);
1116 /* u2 = x2*z1z1 */
1117 felem_mul(tmp, x2, ftmp);
1118 /* tmp[i] < 17*2^120 */
1120 /* h = ftmp4 = u2 - u1 */
1121 felem_diff_128_64(tmp, ftmp3);
1122 /* tmp[i] < 17*2^120 + 2^63 */
1123 felem_reduce(ftmp4, tmp);
1125 x_equal = felem_is_zero(ftmp4);
1127 /* z_out = ftmp5 * h */
1128 felem_mul(tmp, ftmp5, ftmp4);
1129 felem_reduce(z_out, tmp);
1131 /* ftmp = z1 * z1z1 */
1132 felem_mul(tmp, ftmp, z1);
1133 felem_reduce(ftmp, tmp);
1135 /* s2 = tmp = y2 * z1**3 */
1136 felem_mul(tmp, y2, ftmp);
1137 /* tmp[i] < 17*2^120 */
1139 /* r = ftmp5 = (s2 - s1)*2 */
1140 felem_diff_128_64(tmp, ftmp6);
1141 /* tmp[i] < 17*2^120 + 2^63 */
1142 felem_reduce(ftmp5, tmp);
1143 y_equal = felem_is_zero(ftmp5);
1144 felem_scalar64(ftmp5, 2);
1145 /* ftmp5[i] < 2^61 */
1147 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1149 point_double(x3, y3, z3, x1, y1, z1);
1150 return;
1153 /* I = ftmp = (2h)**2 */
1154 felem_assign(ftmp, ftmp4);
1155 felem_scalar64(ftmp, 2);
1156 /* ftmp[i] < 2^61 */
1157 felem_square(tmp, ftmp);
1158 /* tmp[i] < 17*2^122 */
1159 felem_reduce(ftmp, tmp);
1161 /* J = ftmp2 = h * I */
1162 felem_mul(tmp, ftmp4, ftmp);
1163 felem_reduce(ftmp2, tmp);
1165 /* V = ftmp4 = U1 * I */
1166 felem_mul(tmp, ftmp3, ftmp);
1167 felem_reduce(ftmp4, tmp);
1169 /* x_out = r**2 - J - 2V */
1170 felem_square(tmp, ftmp5);
1171 /* tmp[i] < 17*2^122 */
1172 felem_diff_128_64(tmp, ftmp2);
1173 /* tmp[i] < 17*2^122 + 2^63 */
1174 felem_assign(ftmp3, ftmp4);
1175 felem_scalar64(ftmp4, 2);
1176 /* ftmp4[i] < 2^61 */
1177 felem_diff_128_64(tmp, ftmp4);
1178 /* tmp[i] < 17*2^122 + 2^64 */
1179 felem_reduce(x_out, tmp);
1181 /* y_out = r(V-x_out) - 2 * s1 * J */
1182 felem_diff64(ftmp3, x_out);
1183 /* ftmp3[i] < 2^60 + 2^60
1184 * = 2^61 */
1185 felem_mul(tmp, ftmp5, ftmp3);
1186 /* tmp[i] < 17*2^122 */
1187 felem_mul(tmp2, ftmp6, ftmp2);
1188 /* tmp2[i] < 17*2^120 */
1189 felem_scalar128(tmp2, 2);
1190 /* tmp2[i] < 17*2^121 */
1191 felem_diff128(tmp, tmp2);
1192 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1193 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1194 * < 2^127 */
1195 felem_reduce(y_out, tmp);
1197 copy_conditional(x_out, x2, z1_is_zero);
1198 copy_conditional(x_out, x1, z2_is_zero);
1199 copy_conditional(y_out, y2, z1_is_zero);
1200 copy_conditional(y_out, y1, z2_is_zero);
1201 copy_conditional(z_out, z2, z1_is_zero);
1202 copy_conditional(z_out, z1, z2_is_zero);
1203 felem_assign(x3, x_out);
1204 felem_assign(y3, y_out);
1205 felem_assign(z3, z_out);
1208 /* Base point pre computation
1209 * --------------------------
1211 * Two different sorts of precomputed tables are used in the following code.
1212 * Each contain various points on the curve, where each point is three field
1213 * elements (x, y, z).
1215 * For the base point table, z is usually 1 (0 for the point at infinity).
1216 * This table has 16 elements:
1217 * index | bits | point
1218 * ------+---------+------------------------------
1219 * 0 | 0 0 0 0 | 0G
1220 * 1 | 0 0 0 1 | 1G
1221 * 2 | 0 0 1 0 | 2^130G
1222 * 3 | 0 0 1 1 | (2^130 + 1)G
1223 * 4 | 0 1 0 0 | 2^260G
1224 * 5 | 0 1 0 1 | (2^260 + 1)G
1225 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1226 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1227 * 8 | 1 0 0 0 | 2^390G
1228 * 9 | 1 0 0 1 | (2^390 + 1)G
1229 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1230 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1231 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1232 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1233 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1234 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1236 * The reason for this is so that we can clock bits into four different
1237 * locations when doing simple scalar multiplies against the base point.
1239 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1241 /* gmul is the table of precomputed base points */
1242 static const felem gmul[16][3] =
1243 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1244 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1245 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1246 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1247 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1248 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1249 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1250 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1251 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1252 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1253 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1254 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1255 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1256 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1257 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1258 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1259 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1260 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1261 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1262 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1263 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1264 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1265 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1266 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1267 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1268 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1269 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1270 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1271 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1272 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1273 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1274 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1275 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1276 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1277 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1278 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1279 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1280 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1281 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1282 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1283 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1284 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1285 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1286 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1287 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1288 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1289 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1290 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1291 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1292 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1293 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1294 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1295 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1296 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1297 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1298 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1299 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1300 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1301 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1302 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1303 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1304 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1305 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1306 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1307 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1308 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1309 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1310 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1311 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1312 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1313 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1314 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1315 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1316 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1317 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1318 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1319 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1320 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1321 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1322 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1323 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1324 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1325 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1326 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1327 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1328 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1329 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1330 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1331 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1332 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1333 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1334 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1335 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1336 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1337 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1338 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1339 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1340 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1341 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1342 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1343 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1344 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1345 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1346 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1347 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1348 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1349 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1350 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1352 /* select_point selects the |idx|th point from a precomputation table and
1353 * copies it to out. */
1354 static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
1355 felem out[3])
1357 unsigned i, j;
1358 limb *outlimbs = &out[0][0];
1359 memset(outlimbs, 0, 3 * sizeof(felem));
1361 for (i = 0; i < size; i++)
1363 const limb *inlimbs = &pre_comp[i][0][0];
1364 limb mask = i ^ idx;
1365 mask |= mask >> 4;
1366 mask |= mask >> 2;
1367 mask |= mask >> 1;
1368 mask &= 1;
1369 mask--;
1370 for (j = 0; j < NLIMBS * 3; j++)
1371 outlimbs[j] |= inlimbs[j] & mask;
1375 /* get_bit returns the |i|th bit in |in| */
1376 static char get_bit(const felem_bytearray in, int i)
1378 if (i < 0)
1379 return 0;
1380 return (in[i >> 3] >> (i & 7)) & 1;
1383 /* Interleaved point multiplication using precomputed point multiples:
1384 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1385 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1386 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1387 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1388 static void batch_mul(felem x_out, felem y_out, felem z_out,
1389 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1390 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1392 int i, skip;
1393 unsigned num, gen_mul = (g_scalar != NULL);
1394 felem nq[3], tmp[4];
1395 limb bits;
1396 u8 sign, digit;
1398 /* set nq to the point at infinity */
1399 memset(nq, 0, 3 * sizeof(felem));
1401 /* Loop over all scalars msb-to-lsb, interleaving additions
1402 * of multiples of the generator (last quarter of rounds)
1403 * and additions of other points multiples (every 5th round).
1405 skip = 1; /* save two point operations in the first round */
1406 for (i = (num_points ? 520 : 130); i >= 0; --i)
1408 /* double */
1409 if (!skip)
1410 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1412 /* add multiples of the generator */
1413 if (gen_mul && (i <= 130))
1415 bits = get_bit(g_scalar, i + 390) << 3;
1416 if (i < 130)
1418 bits |= get_bit(g_scalar, i + 260) << 2;
1419 bits |= get_bit(g_scalar, i + 130) << 1;
1420 bits |= get_bit(g_scalar, i);
1422 /* select the point to add, in constant time */
1423 select_point(bits, 16, g_pre_comp, tmp);
1424 if (!skip)
1426 point_add(nq[0], nq[1], nq[2],
1427 nq[0], nq[1], nq[2],
1428 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1430 else
1432 memcpy(nq, tmp, 3 * sizeof(felem));
1433 skip = 0;
1437 /* do other additions every 5 doublings */
1438 if (num_points && (i % 5 == 0))
1440 /* loop over all scalars */
1441 for (num = 0; num < num_points; ++num)
1443 bits = get_bit(scalars[num], i + 4) << 5;
1444 bits |= get_bit(scalars[num], i + 3) << 4;
1445 bits |= get_bit(scalars[num], i + 2) << 3;
1446 bits |= get_bit(scalars[num], i + 1) << 2;
1447 bits |= get_bit(scalars[num], i) << 1;
1448 bits |= get_bit(scalars[num], i - 1);
1449 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1451 /* select the point to add or subtract, in constant time */
1452 select_point(digit, 17, pre_comp[num], tmp);
1453 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1454 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1456 if (!skip)
1458 point_add(nq[0], nq[1], nq[2],
1459 nq[0], nq[1], nq[2],
1460 mixed, tmp[0], tmp[1], tmp[2]);
1462 else
1464 memcpy(nq, tmp, 3 * sizeof(felem));
1465 skip = 0;
1470 felem_assign(x_out, nq[0]);
1471 felem_assign(y_out, nq[1]);
1472 felem_assign(z_out, nq[2]);
1476 /* Precomputation for the group generator. */
1477 typedef struct {
1478 felem g_pre_comp[16][3];
1479 int references;
1480 } NISTP521_PRE_COMP;
1482 const EC_METHOD *EC_GFp_nistp521_method(void)
1484 static const EC_METHOD ret = {
1485 EC_FLAGS_DEFAULT_OCT,
1486 NID_X9_62_prime_field,
1487 ec_GFp_nistp521_group_init,
1488 ec_GFp_simple_group_finish,
1489 ec_GFp_simple_group_clear_finish,
1490 ec_GFp_nist_group_copy,
1491 ec_GFp_nistp521_group_set_curve,
1492 ec_GFp_simple_group_get_curve,
1493 ec_GFp_simple_group_get_degree,
1494 ec_GFp_simple_group_check_discriminant,
1495 ec_GFp_simple_point_init,
1496 ec_GFp_simple_point_finish,
1497 ec_GFp_simple_point_clear_finish,
1498 ec_GFp_simple_point_copy,
1499 ec_GFp_simple_point_set_to_infinity,
1500 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1501 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1502 ec_GFp_simple_point_set_affine_coordinates,
1503 ec_GFp_nistp521_point_get_affine_coordinates,
1504 0 /* point_set_compressed_coordinates */,
1505 0 /* point2oct */,
1506 0 /* oct2point */,
1507 ec_GFp_simple_add,
1508 ec_GFp_simple_dbl,
1509 ec_GFp_simple_invert,
1510 ec_GFp_simple_is_at_infinity,
1511 ec_GFp_simple_is_on_curve,
1512 ec_GFp_simple_cmp,
1513 ec_GFp_simple_make_affine,
1514 ec_GFp_simple_points_make_affine,
1515 ec_GFp_nistp521_points_mul,
1516 ec_GFp_nistp521_precompute_mult,
1517 ec_GFp_nistp521_have_precompute_mult,
1518 ec_GFp_nist_field_mul,
1519 ec_GFp_nist_field_sqr,
1520 0 /* field_div */,
1521 0 /* field_encode */,
1522 0 /* field_decode */,
1523 0 /* field_set_to_one */ };
1525 return &ret;
1529 /******************************************************************************/
1530 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1533 static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1535 NISTP521_PRE_COMP *ret = NULL;
1536 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1537 if (!ret)
1539 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1540 return ret;
1542 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1543 ret->references = 1;
1544 return ret;
1547 static void *nistp521_pre_comp_dup(void *src_)
1549 NISTP521_PRE_COMP *src = src_;
1551 /* no need to actually copy, these objects never change! */
1552 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1554 return src_;
1557 static void nistp521_pre_comp_free(void *pre_)
1559 int i;
1560 NISTP521_PRE_COMP *pre = pre_;
1562 if (!pre)
1563 return;
1565 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1566 if (i > 0)
1567 return;
1569 OPENSSL_free(pre);
1572 static void nistp521_pre_comp_clear_free(void *pre_)
1574 int i;
1575 NISTP521_PRE_COMP *pre = pre_;
1577 if (!pre)
1578 return;
1580 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1581 if (i > 0)
1582 return;
1584 OPENSSL_cleanse(pre, sizeof(*pre));
1585 OPENSSL_free(pre);
1588 /******************************************************************************/
1589 /* OPENSSL EC_METHOD FUNCTIONS
1592 int ec_GFp_nistp521_group_init(EC_GROUP *group)
1594 int ret;
1595 ret = ec_GFp_simple_group_init(group);
1596 group->a_is_minus3 = 1;
1597 return ret;
1600 int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1601 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1603 int ret = 0;
1604 BN_CTX *new_ctx = NULL;
1605 BIGNUM *curve_p, *curve_a, *curve_b;
1607 if (ctx == NULL)
1608 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1609 BN_CTX_start(ctx);
1610 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1611 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1612 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1613 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1614 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1615 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1616 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1617 (BN_cmp(curve_b, b)))
1619 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1620 EC_R_WRONG_CURVE_PARAMETERS);
1621 goto err;
1623 group->field_mod_func = BN_nist_mod_521;
1624 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1625 err:
1626 BN_CTX_end(ctx);
1627 if (new_ctx != NULL)
1628 BN_CTX_free(new_ctx);
1629 return ret;
1632 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1633 * (X', Y') = (X/Z^2, Y/Z^3) */
1634 int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1635 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1637 felem z1, z2, x_in, y_in, x_out, y_out;
1638 largefelem tmp;
1640 if (EC_POINT_is_at_infinity(group, point))
1642 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1643 EC_R_POINT_AT_INFINITY);
1644 return 0;
1646 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1647 (!BN_to_felem(z1, &point->Z))) return 0;
1648 felem_inv(z2, z1);
1649 felem_square(tmp, z2); felem_reduce(z1, tmp);
1650 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1651 felem_contract(x_out, x_in);
1652 if (x != NULL)
1654 if (!felem_to_BN(x, x_out))
1656 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1657 return 0;
1660 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1661 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1662 felem_contract(y_out, y_in);
1663 if (y != NULL)
1665 if (!felem_to_BN(y, y_out))
1667 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1668 return 0;
1671 return 1;
1674 static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
1676 /* Runs in constant time, unless an input is the point at infinity
1677 * (which normally shouldn't happen). */
1678 ec_GFp_nistp_points_make_affine_internal(
1679 num,
1680 points,
1681 sizeof(felem),
1682 tmp_felems,
1683 (void (*)(void *)) felem_one,
1684 (int (*)(const void *)) felem_is_zero_int,
1685 (void (*)(void *, const void *)) felem_assign,
1686 (void (*)(void *, const void *)) felem_square_reduce,
1687 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1688 (void (*)(void *, const void *)) felem_inv,
1689 (void (*)(void *, const void *)) felem_contract);
1692 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1693 * Result is stored in r (r can equal one of the inputs). */
1694 int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1695 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1696 const BIGNUM *scalars[], BN_CTX *ctx)
1698 int ret = 0;
1699 int j;
1700 int mixed = 0;
1701 BN_CTX *new_ctx = NULL;
1702 BIGNUM *x, *y, *z, *tmp_scalar;
1703 felem_bytearray g_secret;
1704 felem_bytearray *secrets = NULL;
1705 felem (*pre_comp)[17][3] = NULL;
1706 felem *tmp_felems = NULL;
1707 felem_bytearray tmp;
1708 unsigned i, num_bytes;
1709 int have_pre_comp = 0;
1710 size_t num_points = num;
1711 felem x_in, y_in, z_in, x_out, y_out, z_out;
1712 NISTP521_PRE_COMP *pre = NULL;
1713 felem (*g_pre_comp)[3] = NULL;
1714 EC_POINT *generator = NULL;
1715 const EC_POINT *p = NULL;
1716 const BIGNUM *p_scalar = NULL;
1718 if (ctx == NULL)
1719 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1720 BN_CTX_start(ctx);
1721 if (((x = BN_CTX_get(ctx)) == NULL) ||
1722 ((y = BN_CTX_get(ctx)) == NULL) ||
1723 ((z = BN_CTX_get(ctx)) == NULL) ||
1724 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1725 goto err;
1727 if (scalar != NULL)
1729 pre = EC_EX_DATA_get_data(group->extra_data,
1730 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1731 nistp521_pre_comp_clear_free);
1732 if (pre)
1733 /* we have precomputation, try to use it */
1734 g_pre_comp = &pre->g_pre_comp[0];
1735 else
1736 /* try to use the standard precomputation */
1737 g_pre_comp = (felem (*)[3]) gmul;
1738 generator = EC_POINT_new(group);
1739 if (generator == NULL)
1740 goto err;
1741 /* get the generator from precomputation */
1742 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1743 !felem_to_BN(y, g_pre_comp[1][1]) ||
1744 !felem_to_BN(z, g_pre_comp[1][2]))
1746 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1747 goto err;
1749 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1750 generator, x, y, z, ctx))
1751 goto err;
1752 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1753 /* precomputation matches generator */
1754 have_pre_comp = 1;
1755 else
1756 /* we don't have valid precomputation:
1757 * treat the generator as a random point */
1758 num_points++;
1761 if (num_points > 0)
1763 if (num_points >= 2)
1765 /* unless we precompute multiples for just one point,
1766 * converting those into affine form is time well spent */
1767 mixed = 1;
1769 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1770 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1771 if (mixed)
1772 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1773 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1775 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1776 goto err;
1779 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1780 * i.e., they contribute nothing to the linear combination */
1781 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1782 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1783 for (i = 0; i < num_points; ++i)
1785 if (i == num)
1786 /* we didn't have a valid precomputation, so we pick
1787 * the generator */
1789 p = EC_GROUP_get0_generator(group);
1790 p_scalar = scalar;
1792 else
1793 /* the i^th point */
1795 p = points[i];
1796 p_scalar = scalars[i];
1798 if ((p_scalar != NULL) && (p != NULL))
1800 /* reduce scalar to 0 <= scalar < 2^521 */
1801 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1803 /* this is an unusual input, and we don't guarantee
1804 * constant-timeness */
1805 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1807 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1808 goto err;
1810 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1812 else
1813 num_bytes = BN_bn2bin(p_scalar, tmp);
1814 flip_endian(secrets[i], tmp, num_bytes);
1815 /* precompute multiples */
1816 if ((!BN_to_felem(x_out, &p->X)) ||
1817 (!BN_to_felem(y_out, &p->Y)) ||
1818 (!BN_to_felem(z_out, &p->Z))) goto err;
1819 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1820 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1821 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1822 for (j = 2; j <= 16; ++j)
1824 if (j & 1)
1826 point_add(
1827 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1828 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1829 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1831 else
1833 point_double(
1834 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1835 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1840 if (mixed)
1841 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1844 /* the scalar for the generator */
1845 if ((scalar != NULL) && (have_pre_comp))
1847 memset(g_secret, 0, sizeof(g_secret));
1848 /* reduce scalar to 0 <= scalar < 2^521 */
1849 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1851 /* this is an unusual input, and we don't guarantee
1852 * constant-timeness */
1853 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1855 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1856 goto err;
1858 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1860 else
1861 num_bytes = BN_bn2bin(scalar, tmp);
1862 flip_endian(g_secret, tmp, num_bytes);
1863 /* do the multiplication with generator precomputation*/
1864 batch_mul(x_out, y_out, z_out,
1865 (const felem_bytearray (*)) secrets, num_points,
1866 g_secret,
1867 mixed, (const felem (*)[17][3]) pre_comp,
1868 (const felem (*)[3]) g_pre_comp);
1870 else
1871 /* do the multiplication without generator precomputation */
1872 batch_mul(x_out, y_out, z_out,
1873 (const felem_bytearray (*)) secrets, num_points,
1874 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1875 /* reduce the output to its unique minimal representation */
1876 felem_contract(x_in, x_out);
1877 felem_contract(y_in, y_out);
1878 felem_contract(z_in, z_out);
1879 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1880 (!felem_to_BN(z, z_in)))
1882 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1883 goto err;
1885 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1887 err:
1888 BN_CTX_end(ctx);
1889 if (generator != NULL)
1890 EC_POINT_free(generator);
1891 if (new_ctx != NULL)
1892 BN_CTX_free(new_ctx);
1893 if (secrets != NULL)
1894 OPENSSL_free(secrets);
1895 if (pre_comp != NULL)
1896 OPENSSL_free(pre_comp);
1897 if (tmp_felems != NULL)
1898 OPENSSL_free(tmp_felems);
1899 return ret;
1902 int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1904 int ret = 0;
1905 NISTP521_PRE_COMP *pre = NULL;
1906 int i, j;
1907 BN_CTX *new_ctx = NULL;
1908 BIGNUM *x, *y;
1909 EC_POINT *generator = NULL;
1910 felem tmp_felems[16];
1912 /* throw away old precomputation */
1913 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1914 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1915 if (ctx == NULL)
1916 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1917 BN_CTX_start(ctx);
1918 if (((x = BN_CTX_get(ctx)) == NULL) ||
1919 ((y = BN_CTX_get(ctx)) == NULL))
1920 goto err;
1921 /* get the generator */
1922 if (group->generator == NULL) goto err;
1923 generator = EC_POINT_new(group);
1924 if (generator == NULL)
1925 goto err;
1926 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1927 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1928 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1929 goto err;
1930 if ((pre = nistp521_pre_comp_new()) == NULL)
1931 goto err;
1932 /* if the generator is the standard one, use built-in precomputation */
1933 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1935 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1936 ret = 1;
1937 goto err;
1939 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1940 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1941 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1942 goto err;
1943 /* compute 2^130*G, 2^260*G, 2^390*G */
1944 for (i = 1; i <= 4; i <<= 1)
1946 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1947 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1948 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1949 for (j = 0; j < 129; ++j)
1951 point_double(pre->g_pre_comp[2*i][0],
1952 pre->g_pre_comp[2*i][1],
1953 pre->g_pre_comp[2*i][2],
1954 pre->g_pre_comp[2*i][0],
1955 pre->g_pre_comp[2*i][1],
1956 pre->g_pre_comp[2*i][2]);
1959 /* g_pre_comp[0] is the point at infinity */
1960 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1961 /* the remaining multiples */
1962 /* 2^130*G + 2^260*G */
1963 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
1964 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
1965 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
1966 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1967 pre->g_pre_comp[2][2]);
1968 /* 2^130*G + 2^390*G */
1969 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
1970 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
1971 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1972 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1973 pre->g_pre_comp[2][2]);
1974 /* 2^260*G + 2^390*G */
1975 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
1976 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
1977 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1978 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
1979 pre->g_pre_comp[4][2]);
1980 /* 2^130*G + 2^260*G + 2^390*G */
1981 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
1982 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
1983 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
1984 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1985 pre->g_pre_comp[2][2]);
1986 for (i = 1; i < 8; ++i)
1988 /* odd multiples: add G */
1989 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
1990 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
1991 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
1992 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
1993 pre->g_pre_comp[1][2]);
1995 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
1997 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
1998 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
1999 goto err;
2000 ret = 1;
2001 pre = NULL;
2002 err:
2003 BN_CTX_end(ctx);
2004 if (generator != NULL)
2005 EC_POINT_free(generator);
2006 if (new_ctx != NULL)
2007 BN_CTX_free(new_ctx);
2008 if (pre)
2009 nistp521_pre_comp_free(pre);
2010 return ret;
2013 int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2015 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2016 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2017 != NULL)
2018 return 1;
2019 else
2020 return 0;
2023 #else
2024 static void *dummy=&dummy;
2025 #endif