import less(1)
[unleashed/tickless.git] / usr / src / common / bignum / bignumimpl.c
blobd2fb14a18f343743766cd84f2bfa07fbc47bc221
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Configuration guide
28 * -------------------
30 * There are 4 preprocessor symbols used to configure the bignum
31 * implementation. This file contains no logic to configure based on
32 * processor; we leave that to the Makefiles to specify.
34 * USE_FLOATING_POINT
35 * Meaning: There is support for a fast floating-point implementation of
36 * Montgomery multiply.
38 * PSR_MUL
39 * Meaning: There are processor-specific versions of the low level
40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
42 * for all 4 functions. You cannot pick and choose which subset of these
43 * functions to support; that would lead to a rat's nest of #ifdefs.
45 * HWCAP
46 * Meaning: Call multiply support functions through a function pointer.
47 * On x86, there are multiple implementations for different hardware
48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
49 * a function is first used. So, the support functions are called through
50 * a function pointer. There is no need for that on Sparc, because there
51 * is only one implementation; support functions are called directly.
52 * Later, if there were some new VIS instruction, or something, and a
53 * run-time test were needed, rather than variant kernel modules and
54 * libraries, then HWCAP would be defined for Sparc, as well.
56 * UMUL64
57 * Meaning: It is safe to use generic C code that assumes the existence
58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
65 #include <sys/types.h>
66 #include "bignum.h"
68 #ifdef _KERNEL
69 #include <sys/ddi.h>
70 #include <sys/mdesc.h>
71 #include <sys/crypto/common.h>
73 #include <sys/kmem.h>
74 #include <sys/param.h>
75 #include <sys/sunddi.h>
77 #else
78 #include <stdlib.h>
79 #include <stdio.h>
80 #include <assert.h>
81 #define ASSERT assert
82 #endif /* _KERNEL */
84 #ifdef __amd64
85 #ifdef _KERNEL
86 #include <sys/x86_archext.h> /* cpuid_getvendor() */
87 #include <sys/cpuvar.h>
88 #else
89 #include <sys/auxv.h> /* getisax() */
90 #endif /* _KERNEL */
91 #endif /* __amd64 */
94 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
95 #define UI32(ui) ((uint32_t)ui)
96 #else /* size_t already 32-bits */
97 #define UI32(ui) (ui)
98 #endif
101 #ifdef _KERNEL
102 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
103 #define big_free(ptr, size) kmem_free(ptr, size)
106 * big_realloc()
107 * Allocate memory of newsize bytes and copy oldsize bytes
108 * to the newly-allocated memory, then free the
109 * previously-allocated memory.
110 * Note: newsize must be > oldsize
112 void *
113 big_realloc(void *from, size_t oldsize, size_t newsize)
115 void *rv;
117 rv = kmem_alloc(newsize, KM_NOSLEEP);
118 if (rv != NULL)
119 bcopy(from, rv, oldsize);
120 kmem_free(from, oldsize);
121 return (rv);
124 #else /* _KERNEL */
126 #ifndef MALLOC_DEBUG
128 #define big_malloc(size) malloc(size)
129 #define big_free(ptr, size) free(ptr)
131 #else
133 void
134 big_free(void *ptr, size_t size)
136 printf("freed %d bytes at %p\n", size, ptr);
137 free(ptr);
140 void *
141 big_malloc(size_t size)
143 void *rv;
144 rv = malloc(size);
145 printf("malloced %d bytes, addr:%p\n", size, rv);
146 return (rv);
148 #endif /* MALLOC_DEBUG */
150 #define big_realloc(x, y, z) realloc((x), (z))
154 * printbignum()
155 * Print a BIGNUM type to stdout.
157 void
158 printbignum(char *aname, BIGNUM *a)
160 int i;
162 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
163 for (i = a->len - 1; i >= 0; i--) {
164 #ifdef BIGNUM_CHUNK_32
165 (void) printf("%08x ", a->value[i]);
166 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
167 (void) printf("\n");
169 #else
170 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
171 (uint32_t)((a->value[i]) & 0xffffffff));
172 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
173 (void) printf("\n");
175 #endif
177 (void) printf("\n");
180 #endif /* _KERNEL */
183 #ifdef __amd64
185 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
186 * Cache the result, as the CPU can't change.
188 * Note: the userland version uses getisax() and checks for an AMD-64-only
189 * feature. The kernel version uses cpuid_getvendor().
191 static int
192 bignum_on_intel(void)
194 static int cached_result = -1;
196 if (cached_result == -1) { /* first time */
197 #ifdef _KERNEL
198 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
199 #else
200 uint_t ui;
202 (void) getisax(&ui, 1);
203 cached_result = ((ui & AV_386_AMD_MMX) == 0);
204 #endif /* _KERNEL */
207 return (cached_result);
209 #endif /* __amd64 */
213 * big_init()
214 * Initialize and allocate memory for a BIGNUM type.
216 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
218 * Note: call big_finish() to free memory allocated by big_init().
220 * Input:
221 * number Uninitialized memory for BIGNUM
222 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
224 * Output:
225 * number Initialized BIGNUM
227 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
229 BIG_ERR_CODE
230 big_init(BIGNUM *number, int size)
232 number->value = big_malloc(BIGNUM_WORDSIZE * size);
233 if (number->value == NULL) {
234 return (BIG_NO_MEM);
236 number->size = size;
237 number->len = 0;
238 number->sign = 1;
239 number->malloced = 1;
240 return (BIG_OK);
245 * big_init1()
246 * Initialize and, if needed, allocate memory for a BIGNUM type.
247 * Use the buffer passed, buf, if any, instad of allocating memory
248 * if it's at least "size" bytes.
250 * Note: call big_finish() to free memory allocated by big_init().
252 * Input:
253 * number Uninitialized memory for BIGNUM
254 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
255 * buf Buffer for storing a BIGNUM.
256 * If NULL, big_init1() will allocate a buffer
257 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
259 * Output:
260 * number Initialized BIGNUM
262 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
264 BIG_ERR_CODE
265 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
267 if ((buf == NULL) || (size > bufsize)) {
268 number->value = big_malloc(BIGNUM_WORDSIZE * size);
269 if (number->value == NULL) {
270 return (BIG_NO_MEM);
272 number->size = size;
273 number->malloced = 1;
274 } else {
275 number->value = buf;
276 number->size = bufsize;
277 number->malloced = 0;
279 number->len = 0;
280 number->sign = 1;
282 return (BIG_OK);
287 * big_finish()
288 * Free memory, if any, allocated by big_init() or big_init1().
290 void
291 big_finish(BIGNUM *number)
293 if (number->malloced == 1) {
294 big_free(number->value, BIGNUM_WORDSIZE * number->size);
295 number->malloced = 0;
301 * bn->size should be at least
302 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
303 * converts from byte-big-endian format to bignum format (words in
304 * little endian order, but bytes within the words big endian)
306 void
307 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
309 int i, j;
310 uint32_t offs;
311 const uint32_t slen = UI32(len);
312 BIG_CHUNK_TYPE word;
313 uchar_t *knwordp;
315 if (slen == 0) {
316 bn->len = 1;
317 bn->value[0] = 0;
318 return;
321 offs = slen % BIGNUM_WORDSIZE;
322 bn->len = slen / BIGNUM_WORDSIZE;
324 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
325 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
326 word = knwordp[0];
327 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
328 word = (word << BITSINBYTE) + knwordp[j];
330 bn->value[i] = word;
332 if (offs > 0) {
333 word = kn[0];
334 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
335 bn->value[bn->len++] = word;
337 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
338 bn->len --;
344 * copies the least significant len bytes if
345 * len < bn->len * BIGNUM_WORDSIZE
346 * converts from bignum format to byte-big-endian format.
347 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
349 void
350 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
352 int i, j;
353 uint32_t offs;
354 const uint32_t slen = UI32(len);
355 BIG_CHUNK_TYPE word;
357 if (len < BIGNUM_WORDSIZE * bn->len) {
358 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
359 word = bn->value[i];
360 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
361 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
362 word & 0xff;
363 word = word >> BITSINBYTE;
366 offs = slen % BIGNUM_WORDSIZE;
367 if (offs > 0) {
368 word = bn->value[slen / BIGNUM_WORDSIZE];
369 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) {
370 kn[i - 1] = word & 0xff;
371 word = word >> BITSINBYTE;
374 } else {
375 for (i = 0; i < bn->len; i++) {
376 word = bn->value[i];
377 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
378 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
379 word & 0xff;
380 word = word >> BITSINBYTE;
383 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
384 kn[i] = 0;
391 big_bitlength(BIGNUM *a)
393 int l = 0, b = 0;
394 BIG_CHUNK_TYPE c;
396 l = a->len - 1;
397 while ((l > 0) && (a->value[l] == 0)) {
398 l--;
400 b = BIG_CHUNK_SIZE;
401 c = a->value[l];
402 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
403 c = c << 1;
404 b--;
407 return (l * BIG_CHUNK_SIZE + b);
412 * big_copy()
413 * Copy BIGNUM src to dest, allocating memory if needed.
415 BIG_ERR_CODE
416 big_copy(BIGNUM *dest, BIGNUM *src)
418 BIG_CHUNK_TYPE *newptr;
419 int i, len;
421 len = src->len;
422 while ((len > 1) && (src->value[len - 1] == 0)) {
423 len--;
425 src->len = len;
426 if (dest->size < len) {
427 if (dest->malloced == 1) {
428 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
429 BIGNUM_WORDSIZE * dest->size,
430 BIGNUM_WORDSIZE * len);
431 } else {
432 newptr = (BIG_CHUNK_TYPE *)
433 big_malloc(BIGNUM_WORDSIZE * len);
434 if (newptr != NULL) {
435 dest->malloced = 1;
438 if (newptr == NULL) {
439 return (BIG_NO_MEM);
441 dest->value = newptr;
442 dest->size = len;
444 dest->len = len;
445 dest->sign = src->sign;
446 for (i = 0; i < len; i++) {
447 dest->value[i] = src->value[i];
450 return (BIG_OK);
455 * big_extend()
456 * Allocate memory to extend BIGNUM number to size bignum chunks,
457 * if not at least that size already.
459 BIG_ERR_CODE
460 big_extend(BIGNUM *number, int size)
462 BIG_CHUNK_TYPE *newptr;
463 int i;
465 if (number->size >= size)
466 return (BIG_OK);
467 if (number->malloced) {
468 number->value = big_realloc(number->value,
469 BIGNUM_WORDSIZE * number->size,
470 BIGNUM_WORDSIZE * size);
471 } else {
472 newptr = big_malloc(BIGNUM_WORDSIZE * size);
473 if (newptr != NULL) {
474 for (i = 0; i < number->size; i++) {
475 newptr[i] = number->value[i];
478 number->value = newptr;
481 if (number->value == NULL) {
482 return (BIG_NO_MEM);
485 number->size = size;
486 number->malloced = 1;
487 return (BIG_OK);
491 /* returns 1 if n == 0 */
493 big_is_zero(BIGNUM *n)
495 int i, result;
497 result = 1;
498 for (i = 0; i < n->len; i++) {
499 if (n->value[i] != 0) {
500 result = 0;
503 return (result);
507 BIG_ERR_CODE
508 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
510 int i, shorter, longer;
511 BIG_CHUNK_TYPE cy, ai;
512 BIG_CHUNK_TYPE *r, *a, *b, *c;
513 BIG_ERR_CODE err;
514 BIGNUM *longerarg;
516 if (aa->len > bb->len) {
517 shorter = bb->len;
518 longer = aa->len;
519 longerarg = aa;
520 } else {
521 shorter = aa->len;
522 longer = bb->len;
523 longerarg = bb;
525 if (result->size < longer + 1) {
526 err = big_extend(result, longer + 1);
527 if (err != BIG_OK) {
528 return (err);
532 r = result->value;
533 a = aa->value;
534 b = bb->value;
535 c = longerarg->value;
536 cy = 0;
537 for (i = 0; i < shorter; i++) {
538 ai = a[i];
539 r[i] = ai + b[i] + cy;
540 if (r[i] > ai) {
541 cy = 0;
542 } else if (r[i] < ai) {
543 cy = 1;
546 for (; i < longer; i++) {
547 ai = c[i];
548 r[i] = ai + cy;
549 if (r[i] >= ai) {
550 cy = 0;
553 if (cy == 1) {
554 r[i] = cy;
555 result->len = longer + 1;
556 } else {
557 result->len = longer;
559 result->sign = 1;
560 return (BIG_OK);
564 /* caller must make sure that result has at least len words allocated */
565 void
566 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
568 int i;
569 BIG_CHUNK_TYPE cy, ai;
571 cy = 1;
572 for (i = 0; i < len; i++) {
573 ai = a[i];
574 r[i] = ai + (~b[i]) + cy;
575 if (r[i] > ai) {
576 cy = 0;
577 } else if (r[i] < ai) {
578 cy = 1;
584 /* result=aa-bb it is assumed that aa>=bb */
585 BIG_ERR_CODE
586 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
588 int i, shorter;
589 BIG_CHUNK_TYPE cy = 1, ai;
590 BIG_CHUNK_TYPE *r, *a, *b;
591 BIG_ERR_CODE err = BIG_OK;
593 if (aa->len > bb->len) {
594 shorter = bb->len;
595 } else {
596 shorter = aa->len;
598 if (result->size < aa->len) {
599 err = big_extend(result, aa->len);
600 if (err != BIG_OK) {
601 return (err);
605 r = result->value;
606 a = aa->value;
607 b = bb->value;
608 result->len = aa->len;
609 cy = 1;
610 for (i = 0; i < shorter; i++) {
611 ai = a[i];
612 r[i] = ai + (~b[i]) + cy;
613 if (r[i] > ai) {
614 cy = 0;
615 } else if (r[i] < ai) {
616 cy = 1;
619 for (; i < aa->len; i++) {
620 ai = a[i];
621 r[i] = ai + (~0) + cy;
622 if (r[i] < ai) {
623 cy = 1;
626 result->sign = 1;
628 if (cy == 0) {
629 return (BIG_INVALID_ARGS);
630 } else {
631 return (BIG_OK);
636 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
638 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
640 int i;
642 if (aa->len > bb->len) {
643 for (i = aa->len - 1; i > bb->len - 1; i--) {
644 if (aa->value[i] > 0) {
645 return (1);
648 } else if (aa->len < bb->len) {
649 for (i = bb->len - 1; i > aa->len - 1; i--) {
650 if (bb->value[i] > 0) {
651 return (-1);
654 } else {
655 i = aa->len - 1;
657 for (; i >= 0; i--) {
658 if (aa->value[i] > bb->value[i]) {
659 return (1);
660 } else if (aa->value[i] < bb->value[i]) {
661 return (-1);
665 return (0);
669 BIG_ERR_CODE
670 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
672 BIG_ERR_CODE err;
674 if ((bb->sign == -1) && (aa->sign == 1)) {
675 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
676 return (err);
678 result->sign = 1;
679 } else if ((aa->sign == -1) && (bb->sign == 1)) {
680 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
681 return (err);
683 result->sign = -1;
684 } else if ((aa->sign == 1) && (bb->sign == 1)) {
685 if (big_cmp_abs(aa, bb) >= 0) {
686 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
687 return (err);
689 result->sign = 1;
690 } else {
691 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
692 return (err);
694 result->sign = -1;
696 } else {
697 if (big_cmp_abs(aa, bb) >= 0) {
698 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
699 return (err);
701 result->sign = -1;
702 } else {
703 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
704 return (err);
706 result->sign = 1;
709 return (BIG_OK);
713 BIG_ERR_CODE
714 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
716 BIG_ERR_CODE err;
718 if ((bb->sign == -1) && (aa->sign == -1)) {
719 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
720 return (err);
722 result->sign = -1;
723 } else if ((aa->sign == 1) && (bb->sign == 1)) {
724 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
725 return (err);
727 result->sign = 1;
728 } else if ((aa->sign == 1) && (bb->sign == -1)) {
729 if (big_cmp_abs(aa, bb) >= 0) {
730 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
731 return (err);
733 result->sign = 1;
734 } else {
735 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
736 return (err);
738 result->sign = -1;
740 } else {
741 if (big_cmp_abs(aa, bb) >= 0) {
742 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
743 return (err);
745 result->sign = -1;
746 } else {
747 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
748 return (err);
750 result->sign = 1;
753 return (BIG_OK);
757 /* result = aa/2 */
758 BIG_ERR_CODE
759 big_half_pos(BIGNUM *result, BIGNUM *aa)
761 BIG_ERR_CODE err;
762 int i;
763 BIG_CHUNK_TYPE cy, cy1;
764 BIG_CHUNK_TYPE *a, *r;
766 if (result->size < aa->len) {
767 err = big_extend(result, aa->len);
768 if (err != BIG_OK) {
769 return (err);
773 result->len = aa->len;
774 a = aa->value;
775 r = result->value;
776 cy = 0;
777 for (i = aa->len - 1; i >= 0; i--) {
778 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
779 r[i] = (cy | (a[i] >> 1));
780 cy = cy1;
782 if (r[result->len - 1] == 0) {
783 result->len--;
786 return (BIG_OK);
789 /* result = aa*2 */
790 BIG_ERR_CODE
791 big_double(BIGNUM *result, BIGNUM *aa)
793 BIG_ERR_CODE err;
794 int i, rsize;
795 BIG_CHUNK_TYPE cy, cy1;
796 BIG_CHUNK_TYPE *a, *r;
798 if ((aa->len > 0) &&
799 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
800 rsize = aa->len + 1;
801 } else {
802 rsize = aa->len;
805 if (result->size < rsize) {
806 err = big_extend(result, rsize);
807 if (err != BIG_OK)
808 return (err);
811 a = aa->value;
812 r = result->value;
813 if (rsize == aa->len + 1) {
814 r[rsize - 1] = 1;
816 cy = 0;
817 for (i = 0; i < aa->len; i++) {
818 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
819 r[i] = (cy | (a[i] << 1));
820 cy = cy1;
822 result->len = rsize;
823 return (BIG_OK);
828 * returns aa mod b, aa must be nonneg, b must be a max
829 * (BIG_CHUNK_SIZE / 2)-bit integer
831 static uint32_t
832 big_modhalf_pos(BIGNUM *aa, uint32_t b)
834 int i;
835 BIG_CHUNK_TYPE rem;
837 if (aa->len == 0) {
838 return (0);
840 rem = aa->value[aa->len - 1] % b;
841 for (i = aa->len - 2; i >= 0; i--) {
842 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
843 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
844 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
845 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
848 return ((uint32_t)rem);
853 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
854 * result->size should be at least aa->len at entry
855 * aa, bb, and result should be positive
857 void
858 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
860 int i, lendiff;
861 BIGNUM res1, aa1;
863 lendiff = aa->len - bb->len;
864 res1.size = result->size - lendiff;
865 res1.malloced = 0;
866 res1.value = result->value + lendiff;
867 aa1.size = aa->size - lendiff;
868 aa1.value = aa->value + lendiff;
869 aa1.len = bb->len;
870 aa1.sign = 1;
871 (void) big_sub_pos(&res1, &aa1, bb);
872 if (result->value != aa->value) {
873 for (i = 0; i < lendiff; i++) {
874 result->value[i] = aa->value[i];
877 result->len = aa->len;
882 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
883 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
884 * aa->len should be >= bb->len
887 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
889 int lendiff;
890 BIGNUM aa1;
892 lendiff = aa->len - bb->len;
893 aa1.len = bb->len;
894 aa1.size = aa->size - lendiff;
895 aa1.malloced = 0;
896 aa1.value = aa->value + lendiff;
897 return (big_cmp_abs(&aa1, bb));
902 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
903 * result should have enough space allocated.
905 static void
906 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
908 int i;
909 BIG_CHUNK_TYPE t1, t2, ai, cy;
910 BIG_CHUNK_TYPE *a, *r;
912 a = aa->value;
913 r = result->value;
914 cy = 0;
915 for (i = 0; i < aa->len; i++) {
916 ai = a[i];
917 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
918 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
919 (t1 >> (BIG_CHUNK_SIZE / 2));
920 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
921 (t2 << (BIG_CHUNK_SIZE / 2));
922 cy = t2 >> (BIG_CHUNK_SIZE / 2);
924 r[i] = cy;
925 result->len = aa->len + 1;
926 result->sign = aa->sign;
931 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
932 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
933 * result should have enough space allocated.
935 static void
936 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
938 int i;
939 BIG_CHUNK_TYPE t1, t2, ai, cy, ri;
940 BIG_CHUNK_TYPE *a, *r;
942 a = aa->value;
943 r = result->value;
944 cy = 0;
945 ri = 0;
946 for (i = 0; i < aa->len; i++) {
947 ai = a[i];
948 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
949 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
950 (t1 >> (BIG_CHUNK_SIZE / 2));
951 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri;
952 ri = t2 & BIG_CHUNK_LOWHALFBITS;
953 cy = t2 >> (BIG_CHUNK_SIZE / 2);
955 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri;
956 result->len = aa->len + 1;
957 result->sign = aa->sign;
961 /* it is assumed that result->size is big enough */
962 void
963 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
965 int i;
966 BIG_CHUNK_TYPE cy, ai;
968 if (offs == 0) {
969 if (result != aa) {
970 (void) big_copy(result, aa);
972 return;
974 cy = 0;
975 for (i = 0; i < aa->len; i++) {
976 ai = aa->value[i];
977 result->value[i] = (ai << offs) | cy;
978 cy = ai >> (BIG_CHUNK_SIZE - offs);
980 if (cy != 0) {
981 result->len = aa->len + 1;
982 result->value[result->len - 1] = cy;
983 } else {
984 result->len = aa->len;
986 result->sign = aa->sign;
990 /* it is assumed that result->size is big enough */
991 void
992 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
994 int i;
995 BIG_CHUNK_TYPE cy, ai;
997 if (offs == 0) {
998 if (result != aa) {
999 (void) big_copy(result, aa);
1001 return;
1003 cy = aa->value[0] >> offs;
1004 for (i = 1; i < aa->len; i++) {
1005 ai = aa->value[i];
1006 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
1007 cy = ai >> offs;
1009 result->len = aa->len;
1010 result->value[result->len - 1] = cy;
1011 result->sign = aa->sign;
1016 * result = aa/bb remainder = aa mod bb
1017 * it is assumed that aa and bb are positive
1019 BIG_ERR_CODE
1020 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
1022 BIG_ERR_CODE err = BIG_OK;
1023 int i, alen, blen, tlen, rlen, offs;
1024 BIG_CHUNK_TYPE higha, highb, coeff;
1025 BIG_CHUNK_TYPE *a, *b;
1026 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
1027 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1028 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
1029 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
1030 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
1031 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
1033 a = aa->value;
1034 b = bb->value;
1035 alen = aa->len;
1036 blen = bb->len;
1037 while ((alen > 1) && (a[alen - 1] == 0)) {
1038 alen = alen - 1;
1040 aa->len = alen;
1041 while ((blen > 1) && (b[blen - 1] == 0)) {
1042 blen = blen - 1;
1044 bb->len = blen;
1045 if ((blen == 1) && (b[0] == 0)) {
1046 return (BIG_DIV_BY_0);
1049 if (big_cmp_abs(aa, bb) < 0) {
1050 if ((remainder != NULL) &&
1051 ((err = big_copy(remainder, aa)) != BIG_OK)) {
1052 return (err);
1054 if (result != NULL) {
1055 result->len = 1;
1056 result->sign = 1;
1057 result->value[0] = 0;
1059 return (BIG_OK);
1062 if ((err = big_init1(&bblow, blen + 1,
1063 bblowvalue, arraysize(bblowvalue))) != BIG_OK)
1064 return (err);
1066 if ((err = big_init1(&bbhigh, blen + 1,
1067 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
1068 goto ret1;
1070 if ((err = big_init1(&tmp1, alen + 2,
1071 tmp1value, arraysize(tmp1value))) != BIG_OK)
1072 goto ret2;
1074 if ((err = big_init1(&tmp2, blen + 2,
1075 tmp2value, arraysize(tmp2value))) != BIG_OK)
1076 goto ret3;
1078 if ((err = big_init1(&tresult, alen - blen + 2,
1079 tresultvalue, arraysize(tresultvalue))) != BIG_OK)
1080 goto ret4;
1082 offs = 0;
1083 highb = b[blen - 1];
1084 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
1085 highb = highb >> (BIG_CHUNK_SIZE / 2);
1086 offs = (BIG_CHUNK_SIZE / 2);
1088 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
1089 highb = highb << 1;
1090 offs++;
1093 big_shiftleft(&bblow, bb, offs);
1095 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1096 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1097 } else {
1098 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1100 if (bbhigh.value[bbhigh.len - 1] == 0) {
1101 bbhigh.len--;
1102 } else {
1103 bbhigh.value[bbhigh.len] = 0;
1106 highb = bblow.value[bblow.len - 1];
1108 big_shiftleft(&tmp1, aa, offs);
1109 rlen = tmp1.len - bblow.len + 1;
1110 tresult.len = rlen;
1112 tmp1.len++;
1113 tlen = tmp1.len;
1114 tmp1.value[tmp1.len - 1] = 0;
1115 for (i = 0; i < rlen; i++) {
1116 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1117 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1118 coeff = higha / (highb + 1);
1119 big_mulhalf_high(&tmp2, &bblow, coeff);
1120 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1121 bbhigh.len++;
1122 while (tmp1.value[tlen - 1] > 0) {
1123 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1124 coeff++;
1126 bbhigh.len--;
1127 tlen--;
1128 tmp1.len--;
1129 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1130 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1131 coeff++;
1133 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1134 higha = tmp1.value[tlen - 1];
1135 coeff = higha / (highb + 1);
1136 big_mulhalf_low(&tmp2, &bblow, coeff);
1137 tmp2.len--;
1138 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1139 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1140 big_sub_pos_high(&tmp1, &tmp1, &bblow);
1141 coeff++;
1143 tresult.value[rlen - i - 1] =
1144 tresult.value[rlen - i - 1] + coeff;
1147 big_shiftright(&tmp1, &tmp1, offs);
1149 err = BIG_OK;
1151 if ((remainder != NULL) &&
1152 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1153 goto ret;
1155 if (result != NULL)
1156 err = big_copy(result, &tresult);
1158 ret:
1159 big_finish(&tresult);
1160 ret4:
1161 big_finish(&tmp1);
1162 ret3:
1163 big_finish(&tmp2);
1164 ret2:
1165 big_finish(&bbhigh);
1166 ret1:
1167 big_finish(&bblow);
1168 return (err);
1173 * If there is no processor-specific integer implementation of
1174 * the lower level multiply functions, then this code is provided
1175 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1176 * big_sqr_vec().
1178 * There are two generic implementations. One that assumes that
1179 * there is hardware and C compiler support for a 32 x 32 --> 64
1180 * bit unsigned multiply, but otherwise is not specific to any
1181 * processor, platform, or ISA.
1183 * The other makes very few assumptions about hardware capabilities.
1184 * It does not even assume that there is any implementation of a
1185 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1186 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1187 * multiplies from 16 x 16 --> 32 bit multiplies.
1191 #if !defined(PSR_MUL)
1193 #ifdef UMUL64
1195 #if (BIG_CHUNK_SIZE == 32)
1197 #define UNROLL8
1199 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1200 p = pf * d; \
1201 pf = (uint64_t)a[R + 1]; \
1202 t = p + cy; \
1203 r[R] = (uint32_t)t; \
1204 cy = t >> 32
1206 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1207 p = pf * d; \
1208 t = p + cy; \
1209 r[R] = (uint32_t)t; \
1210 cy = t >> 32
1212 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1213 t = (uint64_t)r[R]; \
1214 p = pf * d; \
1215 pf = (uint64_t)a[R + 1]; \
1216 t = p + t + cy; \
1217 r[R] = (uint32_t)t; \
1218 cy = t >> 32
1220 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1221 t = (uint64_t)r[R]; \
1222 p = pf * d; \
1223 t = p + t + cy; \
1224 r[R] = (uint32_t)t; \
1225 cy = t >> 32
1227 #ifdef UNROLL8
1229 #define UNROLL 8
1232 * r = a * b
1233 * where r and a are vectors; b is a single 32-bit digit
1236 uint32_t
1237 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1239 uint64_t d, pf, p, t, cy;
1241 if (len == 0)
1242 return (0);
1243 cy = 0;
1244 d = (uint64_t)b;
1245 pf = (uint64_t)a[0];
1246 while (len > UNROLL) {
1247 MUL_SET_VEC_ROUND_PREFETCH(0);
1248 MUL_SET_VEC_ROUND_PREFETCH(1);
1249 MUL_SET_VEC_ROUND_PREFETCH(2);
1250 MUL_SET_VEC_ROUND_PREFETCH(3);
1251 MUL_SET_VEC_ROUND_PREFETCH(4);
1252 MUL_SET_VEC_ROUND_PREFETCH(5);
1253 MUL_SET_VEC_ROUND_PREFETCH(6);
1254 MUL_SET_VEC_ROUND_PREFETCH(7);
1255 r += UNROLL;
1256 a += UNROLL;
1257 len -= UNROLL;
1259 if (len == UNROLL) {
1260 MUL_SET_VEC_ROUND_PREFETCH(0);
1261 MUL_SET_VEC_ROUND_PREFETCH(1);
1262 MUL_SET_VEC_ROUND_PREFETCH(2);
1263 MUL_SET_VEC_ROUND_PREFETCH(3);
1264 MUL_SET_VEC_ROUND_PREFETCH(4);
1265 MUL_SET_VEC_ROUND_PREFETCH(5);
1266 MUL_SET_VEC_ROUND_PREFETCH(6);
1267 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1268 return ((uint32_t)cy);
1270 while (len > 1) {
1271 MUL_SET_VEC_ROUND_PREFETCH(0);
1272 ++r;
1273 ++a;
1274 --len;
1276 if (len > 0) {
1277 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1279 return ((uint32_t)cy);
1283 * r += a * b
1284 * where r and a are vectors; b is a single 32-bit digit
1287 uint32_t
1288 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1290 uint64_t d, pf, p, t, cy;
1292 if (len == 0)
1293 return (0);
1294 cy = 0;
1295 d = (uint64_t)b;
1296 pf = (uint64_t)a[0];
1297 while (len > 8) {
1298 MUL_ADD_VEC_ROUND_PREFETCH(0);
1299 MUL_ADD_VEC_ROUND_PREFETCH(1);
1300 MUL_ADD_VEC_ROUND_PREFETCH(2);
1301 MUL_ADD_VEC_ROUND_PREFETCH(3);
1302 MUL_ADD_VEC_ROUND_PREFETCH(4);
1303 MUL_ADD_VEC_ROUND_PREFETCH(5);
1304 MUL_ADD_VEC_ROUND_PREFETCH(6);
1305 MUL_ADD_VEC_ROUND_PREFETCH(7);
1306 r += 8;
1307 a += 8;
1308 len -= 8;
1310 if (len == 8) {
1311 MUL_ADD_VEC_ROUND_PREFETCH(0);
1312 MUL_ADD_VEC_ROUND_PREFETCH(1);
1313 MUL_ADD_VEC_ROUND_PREFETCH(2);
1314 MUL_ADD_VEC_ROUND_PREFETCH(3);
1315 MUL_ADD_VEC_ROUND_PREFETCH(4);
1316 MUL_ADD_VEC_ROUND_PREFETCH(5);
1317 MUL_ADD_VEC_ROUND_PREFETCH(6);
1318 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1319 return ((uint32_t)cy);
1321 while (len > 1) {
1322 MUL_ADD_VEC_ROUND_PREFETCH(0);
1323 ++r;
1324 ++a;
1325 --len;
1327 if (len > 0) {
1328 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1330 return ((uint32_t)cy);
1332 #endif /* UNROLL8 */
1334 void
1335 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1337 uint32_t *tr, *ta;
1338 int tlen, row, col;
1339 uint64_t p, s, t, t2, cy;
1340 uint32_t d;
1342 tr = r + 1;
1343 ta = a;
1344 tlen = len - 1;
1345 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1346 while (--tlen > 0) {
1347 tr += 2;
1348 ++ta;
1349 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1351 s = (uint64_t)a[0];
1352 s = s * s;
1353 r[0] = (uint32_t)s;
1354 cy = s >> 32;
1355 p = ((uint64_t)r[1] << 1) + cy;
1356 r[1] = (uint32_t)p;
1357 cy = p >> 32;
1358 row = 1;
1359 col = 2;
1360 while (row < len) {
1361 s = (uint64_t)a[row];
1362 s = s * s;
1363 p = (uint64_t)r[col] << 1;
1364 t = p + s;
1365 d = (uint32_t)t;
1366 t2 = (uint64_t)d + cy;
1367 r[col] = (uint32_t)t2;
1368 cy = (t >> 32) + (t2 >> 32);
1369 if (row == len - 1)
1370 break;
1371 p = ((uint64_t)r[col + 1] << 1) + cy;
1372 r[col + 1] = (uint32_t)p;
1373 cy = p >> 32;
1374 ++row;
1375 col += 2;
1377 r[col + 1] = (uint32_t)cy;
1380 #else /* BIG_CHUNK_SIZE == 64 */
1383 * r = r + a * digit, r and a are vectors of length len
1384 * returns the carry digit
1386 BIG_CHUNK_TYPE
1387 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1388 BIG_CHUNK_TYPE digit)
1390 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1391 int i;
1393 cy1 = 0;
1394 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1395 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1396 for (i = 0; i < len; i++) {
1397 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1398 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1399 (r[i] & BIG_CHUNK_LOWHALFBITS);
1400 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1401 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1402 (r[i] >> (BIG_CHUNK_SIZE / 2));
1403 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1404 (cy1 << (BIG_CHUNK_SIZE / 2));
1406 retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1408 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1409 for (i = 0; i < len - 1; i++) {
1410 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1411 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1412 (r[i] >> (BIG_CHUNK_SIZE / 2));
1413 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1414 (cy << (BIG_CHUNK_SIZE / 2));
1415 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1416 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1417 (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1419 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1420 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1421 (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1422 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1423 (cy << (BIG_CHUNK_SIZE / 2));
1424 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1425 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1427 return (retcy);
1432 * r = a * digit, r and a are vectors of length len
1433 * returns the carry digit
1435 BIG_CHUNK_TYPE
1436 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1437 BIG_CHUNK_TYPE digit)
1439 int i;
1441 ASSERT(r != a);
1442 for (i = 0; i < len; i++) {
1443 r[i] = 0;
1445 return (big_mul_add_vec(r, a, len, digit));
1448 void
1449 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1451 int i;
1453 ASSERT(r != a);
1454 r[len] = big_mul_set_vec(r, a, len, a[0]);
1455 for (i = 1; i < len; ++i)
1456 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1459 #endif /* BIG_CHUNK_SIZE == 32/64 */
1462 #else /* ! UMUL64 */
1464 #if (BIG_CHUNK_SIZE != 32)
1465 #error "Don't use 64-bit chunks without defining UMUL64"
1466 #endif
1470 * r = r + a * digit, r and a are vectors of length len
1471 * returns the carry digit
1473 uint32_t
1474 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1476 uint32_t cy, cy1, retcy, dlow, dhigh;
1477 int i;
1479 cy1 = 0;
1480 dlow = digit & 0xffff;
1481 dhigh = digit >> 16;
1482 for (i = 0; i < len; i++) {
1483 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1484 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1485 r[i] = (cy & 0xffff) | (cy1 << 16);
1487 retcy = cy1 >> 16;
1489 cy1 = r[0] & 0xffff;
1490 for (i = 0; i < len - 1; i++) {
1491 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1492 r[i] = (cy1 & 0xffff) | (cy << 16);
1493 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1495 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1496 r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1497 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1499 return (retcy);
1504 * r = a * digit, r and a are vectors of length len
1505 * returns the carry digit
1507 uint32_t
1508 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1510 int i;
1512 ASSERT(r != a);
1513 for (i = 0; i < len; i++) {
1514 r[i] = 0;
1517 return (big_mul_add_vec(r, a, len, digit));
1520 void
1521 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1523 int i;
1525 ASSERT(r != a);
1526 r[len] = big_mul_set_vec(r, a, len, a[0]);
1527 for (i = 1; i < len; ++i)
1528 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1531 #endif /* UMUL64 */
1533 void
1534 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1535 BIG_CHUNK_TYPE *b, int blen)
1537 int i;
1539 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1540 for (i = 1; i < blen; ++i)
1541 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1545 #endif /* ! PSR_MUL */
1549 * result = aa * bb result->value should be big enough to hold the result
1551 * Implementation: Standard grammar school algorithm
1554 BIG_ERR_CODE
1555 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1557 BIGNUM tmp1;
1558 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1559 BIG_CHUNK_TYPE *r, *t, *a, *b;
1560 BIG_ERR_CODE err;
1561 int i, alen, blen, rsize, sign, diff;
1563 if (aa == bb) {
1564 diff = 0;
1565 } else {
1566 diff = big_cmp_abs(aa, bb);
1567 if (diff < 0) {
1568 BIGNUM *tt;
1569 tt = aa;
1570 aa = bb;
1571 bb = tt;
1574 a = aa->value;
1575 b = bb->value;
1576 alen = aa->len;
1577 blen = bb->len;
1578 while ((alen > 1) && (a[alen - 1] == 0)) {
1579 alen--;
1581 aa->len = alen;
1582 while ((blen > 1) && (b[blen - 1] == 0)) {
1583 blen--;
1585 bb->len = blen;
1587 rsize = alen + blen;
1588 ASSERT(rsize > 0);
1589 if (result->size < rsize) {
1590 err = big_extend(result, rsize);
1591 if (err != BIG_OK) {
1592 return (err);
1594 /* aa or bb might be an alias to result */
1595 a = aa->value;
1596 b = bb->value;
1598 r = result->value;
1600 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1601 result->len = 1;
1602 result->sign = 1;
1603 r[0] = 0;
1604 return (BIG_OK);
1606 sign = aa->sign * bb->sign;
1607 if ((alen == 1) && (a[0] == 1)) {
1608 for (i = 0; i < blen; i++) {
1609 r[i] = b[i];
1611 result->len = blen;
1612 result->sign = sign;
1613 return (BIG_OK);
1615 if ((blen == 1) && (b[0] == 1)) {
1616 for (i = 0; i < alen; i++) {
1617 r[i] = a[i];
1619 result->len = alen;
1620 result->sign = sign;
1621 return (BIG_OK);
1624 if ((err = big_init1(&tmp1, rsize,
1625 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1626 return (err);
1628 t = tmp1.value;
1630 for (i = 0; i < rsize; i++) {
1631 t[i] = 0;
1634 if (diff == 0 && alen > 2) {
1635 BIG_SQR_VEC(t, a, alen);
1636 } else if (blen > 0) {
1637 BIG_MUL_VEC(t, a, alen, b, blen);
1640 if (t[rsize - 1] == 0) {
1641 tmp1.len = rsize - 1;
1642 } else {
1643 tmp1.len = rsize;
1646 err = big_copy(result, &tmp1);
1648 result->sign = sign;
1650 big_finish(&tmp1);
1652 return (err);
1657 * big_mont_mul()
1658 * Montgomery multiplication.
1660 * Caller must ensure that a < n, b < n, ret->size >= 2 * n->len + 1,
1661 * and that ret is not n.
1663 BIG_ERR_CODE
1664 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1666 int i, j, nlen, needsubtract;
1667 BIG_CHUNK_TYPE *nn, *rr, *rrplusi;
1668 BIG_CHUNK_TYPE digit, c;
1669 BIG_ERR_CODE err;
1670 #ifdef __amd64
1671 #define BIG_CPU_UNKNOWN 0
1672 #define BIG_CPU_AMD 1
1673 #define BIG_CPU_INTEL 2
1674 static int big_cpu = BIG_CPU_UNKNOWN;
1675 BIG_CHUNK_TYPE carry[BIGTMPSIZE];
1677 if (big_cpu == BIG_CPU_UNKNOWN) {
1678 big_cpu = 1 + bignum_on_intel();
1680 #endif /* __amd64 */
1682 nlen = n->len;
1683 nn = n->value;
1685 rr = ret->value;
1687 if ((err = big_mul(ret, a, b)) != BIG_OK) {
1688 return (err);
1691 rr = ret->value;
1692 for (i = ret->len; i < 2 * nlen + 1; i++) {
1693 rr[i] = 0;
1696 #ifdef __amd64 /* pipelining optimization for Intel 64, but not AMD64 */
1697 if ((big_cpu == BIG_CPU_INTEL) && (nlen <= BIGTMPSIZE)) {
1699 * Perform the following in two for loops to reduce the
1700 * dependency between computing the carryover bits with
1701 * BIG_MUL_ADD_VEC() and adding them, thus improving pipelining.
1703 for (i = 0; i < nlen; i++) {
1704 rrplusi = rr + i;
1705 digit = *rrplusi * n0;
1706 carry[i] = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit);
1708 for (i = 0; i < nlen; i++) {
1709 j = i + nlen;
1710 rr[j] += carry[i];
1711 while (rr[j] < carry[i]) {
1712 rr[++j] += 1;
1713 carry[i] = 1;
1716 } else
1717 #endif /* __amd64 */
1718 { /* no pipelining optimization */
1719 for (i = 0; i < nlen; i++) {
1720 rrplusi = rr + i;
1721 digit = *rrplusi * n0;
1722 c = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit);
1723 j = i + nlen;
1724 rr[j] += c;
1725 while (rr[j] < c) {
1726 rr[++j] += 1;
1727 c = 1;
1732 needsubtract = 0;
1733 if ((rr[2 * nlen] != 0))
1734 needsubtract = 1;
1735 else {
1736 for (i = 2 * nlen - 1; i >= nlen; i--) {
1737 if (rr[i] > nn[i - nlen]) {
1738 needsubtract = 1;
1739 break;
1740 } else if (rr[i] < nn[i - nlen]) {
1741 break;
1745 if (needsubtract)
1746 big_sub_vec(rr, rr + nlen, nn, nlen);
1747 else {
1748 for (i = 0; i < nlen; i++) {
1749 rr[i] = rr[i + nlen];
1753 /* Remove leading zeros, but keep at least 1 digit: */
1754 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1756 ret->len = i + 1;
1758 return (BIG_OK);
1762 BIG_CHUNK_TYPE
1763 big_n0(BIG_CHUNK_TYPE n)
1765 int i;
1766 BIG_CHUNK_TYPE result, tmp;
1768 result = 0;
1769 tmp = BIG_CHUNK_ALLBITS;
1770 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1771 if ((tmp & 1) == 1) {
1772 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1773 tmp = tmp - n;
1774 } else {
1775 result = (result >> 1);
1777 tmp = tmp >> 1;
1780 return (result);
1785 big_numbits(BIGNUM *n)
1787 int i, j;
1788 BIG_CHUNK_TYPE t;
1790 for (i = n->len - 1; i > 0; i--) {
1791 if (n->value[i] != 0) {
1792 break;
1795 t = n->value[i];
1796 for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1797 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1798 t = t << 1;
1799 } else {
1800 return (BIG_CHUNK_SIZE * i + j);
1803 return (0);
1807 /* caller must make sure that a < n */
1808 BIG_ERR_CODE
1809 big_mont_rr(BIGNUM *result, BIGNUM *n)
1811 BIGNUM rr;
1812 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1813 int len, i;
1814 BIG_ERR_CODE err;
1816 rr.malloced = 0;
1817 len = n->len;
1819 if ((err = big_init1(&rr, 2 * len + 1,
1820 rrvalue, arraysize(rrvalue))) != BIG_OK) {
1821 return (err);
1824 for (i = 0; i < 2 * len; i++) {
1825 rr.value[i] = 0;
1827 rr.value[2 * len] = 1;
1828 rr.len = 2 * len + 1;
1829 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1830 goto ret;
1832 err = big_copy(result, &rr);
1833 ret:
1834 big_finish(&rr);
1835 return (err);
1839 /* caller must make sure that a < n */
1840 BIG_ERR_CODE
1841 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1842 BIGNUM *n_rr)
1844 BIGNUM rr;
1845 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1846 int len, i;
1847 BIG_ERR_CODE err;
1849 rr.malloced = 0;
1850 len = n->len;
1852 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1853 != BIG_OK) {
1854 return (err);
1857 if (n_rr == NULL) {
1858 for (i = 0; i < 2 * len; i++) {
1859 rr.value[i] = 0;
1861 rr.value[2 * len] = 1;
1862 rr.len = 2 * len + 1;
1863 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1864 goto ret;
1866 n_rr = &rr;
1869 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1870 goto ret;
1872 err = big_copy(result, &rr);
1874 ret:
1875 big_finish(&rr);
1876 return (err);
1880 #ifdef USE_FLOATING_POINT
1881 #define big_modexp_ncp_float big_modexp_ncp_sw
1882 #else
1883 #define big_modexp_ncp_int big_modexp_ncp_sw
1884 #endif
1886 #define MAX_EXP_BIT_GROUP_SIZE 6
1887 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1889 /* ARGSUSED */
1890 static BIG_ERR_CODE
1891 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1892 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1895 BIGNUM apowers[APOWERS_MAX_SIZE];
1896 BIGNUM tmp1;
1897 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1898 int i, j, k, l, m, p;
1899 uint32_t bit, bitind, bitcount, groupbits, apowerssize;
1900 uint32_t nbits;
1901 BIG_ERR_CODE err;
1903 nbits = big_numbits(e);
1904 if (nbits < 50) {
1905 groupbits = 1;
1906 apowerssize = 1;
1907 } else {
1908 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1909 apowerssize = 1 << (groupbits - 1);
1913 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1914 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1915 return (err);
1918 /* clear the malloced bit to help cleanup */
1919 for (i = 0; i < apowerssize; i++) {
1920 apowers[i].malloced = 0;
1923 for (i = 0; i < apowerssize; i++) {
1924 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1925 BIG_OK) {
1926 goto ret;
1930 (void) big_copy(&(apowers[0]), ma);
1932 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1933 goto ret;
1935 (void) big_copy(ma, &tmp1);
1937 for (i = 1; i < apowerssize; i++) {
1938 if ((err = big_mont_mul(&tmp1, ma,
1939 &(apowers[i - 1]), n, n0)) != BIG_OK) {
1940 goto ret;
1942 (void) big_copy(&apowers[i], &tmp1);
1945 bitind = nbits % BIG_CHUNK_SIZE;
1946 k = 0;
1947 l = 0;
1948 p = 0;
1949 bitcount = 0;
1950 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1951 for (j = bitind - 1; j >= 0; j--) {
1952 bit = (e->value[i] >> j) & 1;
1953 if ((bitcount == 0) && (bit == 0)) {
1954 if ((err = big_mont_mul(tmp,
1955 tmp, tmp, n, n0)) != BIG_OK) {
1956 goto ret;
1958 } else {
1959 bitcount++;
1960 p = p * 2 + bit;
1961 if (bit == 1) {
1962 k = k + l + 1;
1963 l = 0;
1964 } else {
1965 l++;
1967 if (bitcount == groupbits) {
1968 for (m = 0; m < k; m++) {
1969 if ((err = big_mont_mul(tmp,
1970 tmp, tmp, n, n0)) !=
1971 BIG_OK) {
1972 goto ret;
1975 if ((err = big_mont_mul(tmp, tmp,
1976 &(apowers[p >> (l + 1)]),
1977 n, n0)) != BIG_OK) {
1978 goto ret;
1980 for (m = 0; m < l; m++) {
1981 if ((err = big_mont_mul(tmp,
1982 tmp, tmp, n, n0)) !=
1983 BIG_OK) {
1984 goto ret;
1987 k = 0;
1988 l = 0;
1989 p = 0;
1990 bitcount = 0;
1994 bitind = BIG_CHUNK_SIZE;
1997 for (m = 0; m < k; m++) {
1998 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1999 goto ret;
2002 if (p != 0) {
2003 if ((err = big_mont_mul(tmp, tmp,
2004 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
2005 goto ret;
2008 for (m = 0; m < l; m++) {
2009 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
2010 goto ret;
2014 ret:
2015 for (i = apowerssize - 1; i >= 0; i--) {
2016 big_finish(&(apowers[i]));
2018 big_finish(&tmp1);
2020 return (err);
2024 #ifdef USE_FLOATING_POINT
2026 #ifdef _KERNEL
2028 #include <sys/sysmacros.h>
2029 #include <sys/regset.h>
2030 #include <sys/fpu/fpusystm.h>
2032 /* the alignment for block stores to save fp registers */
2033 #define FPR_ALIGN (64)
2035 extern void big_savefp(kfpu_t *);
2036 extern void big_restorefp(kfpu_t *);
2038 #endif /* _KERNEL */
2041 * This version makes use of floating point for performance
2043 static BIG_ERR_CODE
2044 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
2045 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
2048 int i, j, k, l, m, p;
2049 uint32_t bit, bitind, bitcount, nlen;
2050 double dn0;
2051 double *dn, *dt, *d16r, *d32r;
2052 uint32_t *nint, *prod;
2053 double *apowers[APOWERS_MAX_SIZE];
2054 uint32_t nbits, groupbits, apowerssize;
2055 BIG_ERR_CODE err = BIG_OK;
2057 #ifdef _KERNEL
2058 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
2059 kfpu_t *fpu;
2061 #ifdef DEBUG
2062 if (!fpu_exists)
2063 return (BIG_GENERAL_ERR);
2064 #endif
2066 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
2067 big_savefp(fpu);
2069 #endif /* _KERNEL */
2071 nbits = big_numbits(e);
2072 if (nbits < 50) {
2073 groupbits = 1;
2074 apowerssize = 1;
2075 } else {
2076 groupbits = MAX_EXP_BIT_GROUP_SIZE;
2077 apowerssize = 1 << (groupbits - 1);
2080 nlen = (BIG_CHUNK_SIZE / 32) * n->len;
2081 dn0 = (double)(n0 & 0xffff);
2083 dn = dt = d16r = d32r = NULL;
2084 nint = prod = NULL;
2085 for (i = 0; i < apowerssize; i++) {
2086 apowers[i] = NULL;
2089 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
2090 err = BIG_NO_MEM;
2091 goto ret;
2093 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
2094 err = BIG_NO_MEM;
2095 goto ret;
2097 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
2098 err = BIG_NO_MEM;
2099 goto ret;
2101 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
2102 err = BIG_NO_MEM;
2103 goto ret;
2105 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
2106 err = BIG_NO_MEM;
2107 goto ret;
2109 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
2110 err = BIG_NO_MEM;
2111 goto ret;
2113 for (i = 0; i < apowerssize; i++) {
2114 if ((apowers[i] = big_malloc((2 * nlen + 1) *
2115 sizeof (double))) == NULL) {
2116 err = BIG_NO_MEM;
2117 goto ret;
2121 #if (BIG_CHUNK_SIZE == 32)
2122 for (i = 0; i < ma->len; i++) {
2123 nint[i] = ma->value[i];
2125 for (; i < nlen; i++) {
2126 nint[i] = 0;
2128 #else
2129 for (i = 0; i < ma->len; i++) {
2130 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
2131 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
2133 for (i = ma->len * 2; i < nlen; i++) {
2134 nint[i] = 0;
2136 #endif
2137 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2139 #if (BIG_CHUNK_SIZE == 32)
2140 for (i = 0; i < n->len; i++) {
2141 nint[i] = n->value[i];
2143 for (; i < nlen; i++) {
2144 nint[i] = 0;
2146 #else
2147 for (i = 0; i < n->len; i++) {
2148 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2149 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2151 for (i = n->len * 2; i < nlen; i++) {
2152 nint[i] = 0;
2154 #endif
2155 conv_i32_to_d32(dn, nint, nlen);
2157 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2158 conv_i32_to_d32(d32r, prod, nlen);
2159 for (i = 1; i < apowerssize; i++) {
2160 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2161 dt, dn, nint, nlen, dn0);
2162 conv_i32_to_d16(apowers[i], prod, nlen);
2165 #if (BIG_CHUNK_SIZE == 32)
2166 for (i = 0; i < tmp->len; i++) {
2167 prod[i] = tmp->value[i];
2169 for (; i < nlen + 1; i++) {
2170 prod[i] = 0;
2172 #else
2173 for (i = 0; i < tmp->len; i++) {
2174 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2175 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2177 for (i = tmp->len * 2; i < nlen + 1; i++) {
2178 prod[i] = 0;
2180 #endif
2182 bitind = nbits % BIG_CHUNK_SIZE;
2183 k = 0;
2184 l = 0;
2185 p = 0;
2186 bitcount = 0;
2187 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2188 for (j = bitind - 1; j >= 0; j--) {
2189 bit = (e->value[i] >> j) & 1;
2190 if ((bitcount == 0) && (bit == 0)) {
2191 conv_i32_to_d32_and_d16(d32r, d16r,
2192 prod, nlen);
2193 mont_mulf_noconv(prod, d32r, d16r,
2194 dt, dn, nint, nlen, dn0);
2195 } else {
2196 bitcount++;
2197 p = p * 2 + bit;
2198 if (bit == 1) {
2199 k = k + l + 1;
2200 l = 0;
2201 } else {
2202 l++;
2204 if (bitcount == groupbits) {
2205 for (m = 0; m < k; m++) {
2206 conv_i32_to_d32_and_d16(d32r,
2207 d16r, prod, nlen);
2208 mont_mulf_noconv(prod, d32r,
2209 d16r, dt, dn, nint,
2210 nlen, dn0);
2212 conv_i32_to_d32(d32r, prod, nlen);
2213 mont_mulf_noconv(prod, d32r,
2214 apowers[p >> (l + 1)],
2215 dt, dn, nint, nlen, dn0);
2216 for (m = 0; m < l; m++) {
2217 conv_i32_to_d32_and_d16(d32r,
2218 d16r, prod, nlen);
2219 mont_mulf_noconv(prod, d32r,
2220 d16r, dt, dn, nint,
2221 nlen, dn0);
2223 k = 0;
2224 l = 0;
2225 p = 0;
2226 bitcount = 0;
2230 bitind = BIG_CHUNK_SIZE;
2233 for (m = 0; m < k; m++) {
2234 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2235 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2237 if (p != 0) {
2238 conv_i32_to_d32(d32r, prod, nlen);
2239 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2240 dt, dn, nint, nlen, dn0);
2242 for (m = 0; m < l; m++) {
2243 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2244 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2247 #if (BIG_CHUNK_SIZE == 32)
2248 for (i = 0; i < nlen; i++) {
2249 result->value[i] = prod[i];
2251 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2253 #else
2254 for (i = 0; i < nlen / 2; i++) {
2255 result->value[i] = (uint64_t)(prod[2 * i]) +
2256 (((uint64_t)(prod[2 * i + 1])) << 32);
2258 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2260 #endif
2261 result->len = i + 1;
2263 ret:
2264 for (i = apowerssize - 1; i >= 0; i--) {
2265 if (apowers[i] != NULL)
2266 big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2268 if (d32r != NULL) {
2269 big_free(d32r, nlen * sizeof (double));
2271 if (d16r != NULL) {
2272 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2274 if (prod != NULL) {
2275 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2277 if (nint != NULL) {
2278 big_free(nint, nlen * sizeof (uint32_t));
2280 if (dt != NULL) {
2281 big_free(dt, (4 * nlen + 2) * sizeof (double));
2283 if (dn != NULL) {
2284 big_free(dn, nlen * sizeof (double));
2287 #ifdef _KERNEL
2288 big_restorefp(fpu);
2289 #endif
2291 return (err);
2294 #endif /* USE_FLOATING_POINT */
2297 BIG_ERR_CODE
2298 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2299 big_modexp_ncp_info_t *info)
2301 BIGNUM ma, tmp, rr;
2302 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2303 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2304 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2305 BIG_ERR_CODE err;
2306 BIG_CHUNK_TYPE n0;
2308 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2309 BIG_OK) {
2310 return (err);
2312 ma.len = 1;
2313 ma.value[0] = 0;
2315 if ((err = big_init1(&tmp, 2 * n->len + 1,
2316 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2317 goto ret1;
2320 /* clear the malloced bit to help cleanup */
2321 rr.malloced = 0;
2323 if (n_rr == NULL) {
2324 if ((err = big_init1(&rr, 2 * n->len + 1,
2325 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2326 goto ret2;
2328 if (big_mont_rr(&rr, n) != BIG_OK) {
2329 goto ret;
2331 n_rr = &rr;
2334 n0 = big_n0(n->value[0]);
2336 if (big_cmp_abs(a, n) > 0) {
2337 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2338 goto ret;
2340 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2341 } else {
2342 err = big_mont_conv(&ma, a, n, n0, n_rr);
2344 if (err != BIG_OK) {
2345 goto ret;
2348 tmp.len = 1;
2349 tmp.value[0] = 1;
2350 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2351 goto ret;
2354 if ((info != NULL) && (info->func != NULL)) {
2355 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2356 info->ncp, info->reqp);
2357 } else {
2358 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2360 if (err != BIG_OK) {
2361 goto ret;
2364 ma.value[0] = 1;
2365 ma.len = 1;
2366 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2367 goto ret;
2369 err = big_copy(result, &tmp);
2371 ret:
2372 if (rr.malloced) {
2373 big_finish(&rr);
2375 ret2:
2376 big_finish(&tmp);
2377 ret1:
2378 big_finish(&ma);
2380 return (err);
2383 BIG_ERR_CODE
2384 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2386 return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2390 BIG_ERR_CODE
2391 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2392 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2393 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2395 BIGNUM ap, aq, tmp;
2396 int alen, biglen, sign;
2397 BIG_ERR_CODE err;
2399 if (p->len > q->len) {
2400 biglen = p->len;
2401 } else {
2402 biglen = q->len;
2405 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2406 return (err);
2408 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2409 goto ret1;
2411 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2412 goto ret2;
2416 * check whether a is too short - to avoid timing attacks
2418 alen = a->len;
2419 while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2420 alen--;
2422 if (alen < p->len + q->len) {
2424 * a is too short, add p*q to it before
2425 * taking it modulo p and q
2426 * this will also affect timing, but this difference
2427 * does not depend on p or q, only on a
2428 * (in "normal" operation, this path will never be
2429 * taken, so it is not a performance penalty
2431 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2432 goto ret;
2434 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2435 goto ret;
2437 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2438 goto ret;
2440 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2441 goto ret;
2443 } else {
2444 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2445 goto ret;
2447 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2448 goto ret;
2452 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2453 BIG_OK) {
2454 goto ret;
2456 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2457 BIG_OK) {
2458 goto ret;
2460 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2461 goto ret;
2463 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2464 goto ret;
2466 sign = tmp.sign;
2467 tmp.sign = 1;
2468 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2469 goto ret;
2471 if ((sign == -1) && (!big_is_zero(&aq))) {
2472 (void) big_sub_pos(&aq, q, &aq);
2474 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2475 goto ret;
2477 err = big_add_abs(result, &ap, &tmp);
2479 ret:
2480 big_finish(&tmp);
2481 ret2:
2482 big_finish(&aq);
2483 ret1:
2484 big_finish(&ap);
2486 return (err);
2490 BIG_ERR_CODE
2491 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2492 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2493 BIGNUM *p_rr, BIGNUM *q_rr)
2495 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2496 p, q, pinvmodq, p_rr, q_rr, NULL));
2500 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2501 #if !defined(NO_BIG_ONE)
2502 BIGNUM big_One = {1, 1, 1, 0, onearr};
2503 #endif
2505 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2506 #if !defined(NO_BIG_TWO)
2507 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2508 #endif
2510 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2511 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2514 BIG_ERR_CODE
2515 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2517 BIGNUM *high, *low, *mid, *t;
2518 BIGNUM t1, t2, t3, prod;
2519 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2520 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2521 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2522 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2523 int i, diff;
2524 uint32_t nbits, nrootbits, highbits;
2525 BIG_ERR_CODE err;
2527 nbits = big_numbits(n);
2529 if ((err = big_init1(&t1, n->len + 1,
2530 t1value, arraysize(t1value))) != BIG_OK)
2531 return (err);
2532 if ((err = big_init1(&t2, n->len + 1,
2533 t2value, arraysize(t2value))) != BIG_OK)
2534 goto ret1;
2535 if ((err = big_init1(&t3, n->len + 1,
2536 t3value, arraysize(t3value))) != BIG_OK)
2537 goto ret2;
2538 if ((err = big_init1(&prod, n->len + 1,
2539 prodvalue, arraysize(prodvalue))) != BIG_OK)
2540 goto ret3;
2542 nrootbits = (nbits + 1) / 2;
2543 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2544 for (i = 0; i < t1.len; i++) {
2545 t1.value[i] = 0;
2546 t2.value[i] = BIG_CHUNK_ALLBITS;
2548 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2549 if (highbits == BIG_CHUNK_SIZE) {
2550 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2551 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2552 } else {
2553 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2554 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2557 high = &t2;
2558 low = &t1;
2559 mid = &t3;
2561 if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2562 goto ret;
2564 diff = big_cmp_abs(&prod, n);
2565 if (diff <= 0) {
2566 err = big_copy(result, high);
2567 goto ret;
2570 (void) big_sub_pos(mid, high, low);
2571 while (big_cmp_abs(&big_One, mid) != 0) {
2572 (void) big_add_abs(mid, high, low);
2573 (void) big_half_pos(mid, mid);
2574 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2575 goto ret;
2576 diff = big_cmp_abs(&prod, n);
2577 if (diff > 0) {
2578 t = high;
2579 high = mid;
2580 mid = t;
2581 } else if (diff < 0) {
2582 t = low;
2583 low = mid;
2584 mid = t;
2585 } else {
2586 err = big_copy(result, low);
2587 goto ret;
2589 (void) big_sub_pos(mid, high, low);
2592 err = big_copy(result, low);
2593 ret:
2594 if (prod.malloced) big_finish(&prod);
2595 ret3:
2596 if (t3.malloced) big_finish(&t3);
2597 ret2:
2598 if (t2.malloced) big_finish(&t2);
2599 ret1:
2600 if (t1.malloced) big_finish(&t1);
2602 return (err);
2606 BIG_ERR_CODE
2607 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2609 BIGNUM *t, *tmp2, *m, *n;
2610 BIGNUM t1, t2, t3;
2611 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2612 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2613 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2614 int len, err;
2616 if (big_is_zero(nn) ||
2617 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2618 *jac = 0;
2619 return (BIG_OK);
2622 if (nn->len > mm->len) {
2623 len = nn->len;
2624 } else {
2625 len = mm->len;
2628 if ((err = big_init1(&t1, len,
2629 t1value, arraysize(t1value))) != BIG_OK) {
2630 return (err);
2632 if ((err = big_init1(&t2, len,
2633 t2value, arraysize(t2value))) != BIG_OK) {
2634 goto ret1;
2636 if ((err = big_init1(&t3, len,
2637 t3value, arraysize(t3value))) != BIG_OK) {
2638 goto ret2;
2641 n = &t1;
2642 m = &t2;
2643 tmp2 = &t3;
2645 (void) big_copy(n, nn);
2646 (void) big_copy(m, mm);
2648 *jac = 1;
2649 while (big_cmp_abs(&big_One, m) != 0) {
2650 if (big_is_zero(n)) {
2651 *jac = 0;
2652 goto ret;
2654 if ((m->value[0] & 1) == 0) {
2655 if (((n->value[0] & 7) == 3) ||
2656 ((n->value[0] & 7) == 5))
2657 *jac = -*jac;
2658 (void) big_half_pos(m, m);
2659 } else if ((n->value[0] & 1) == 0) {
2660 if (((m->value[0] & 7) == 3) ||
2661 ((m->value[0] & 7) == 5))
2662 *jac = -*jac;
2663 (void) big_half_pos(n, n);
2664 } else {
2665 if (((m->value[0] & 3) == 3) &&
2666 ((n->value[0] & 3) == 3)) {
2667 *jac = -*jac;
2669 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2670 goto ret;
2672 t = tmp2;
2673 tmp2 = m;
2674 m = n;
2675 n = t;
2678 err = BIG_OK;
2680 ret:
2681 if (t3.malloced) big_finish(&t3);
2682 ret2:
2683 if (t2.malloced) big_finish(&t2);
2684 ret1:
2685 if (t1.malloced) big_finish(&t1);
2687 return (err);
2691 BIG_ERR_CODE
2692 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2694 int i;
2695 uint32_t m, w;
2696 BIG_CHUNK_TYPE bit;
2697 BIGNUM ki, tmp, tmp2;
2698 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2699 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2700 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2701 BIG_ERR_CODE err;
2703 if (big_cmp_abs(k, &big_One) == 0) {
2704 (void) big_copy(Lk, p);
2705 (void) big_copy(Lkminus1, &big_Two);
2706 return (BIG_OK);
2709 if ((err = big_init1(&ki, k->len + 1,
2710 kivalue, arraysize(kivalue))) != BIG_OK)
2711 return (err);
2713 if ((err = big_init1(&tmp, 2 * n->len + 1,
2714 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2715 goto ret1;
2717 if ((err = big_init1(&tmp2, n->len,
2718 tmp2value, arraysize(tmp2value))) != BIG_OK)
2719 goto ret2;
2721 m = big_numbits(k);
2722 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2723 w = (m - 1) / BIG_CHUNK_SIZE;
2724 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2725 for (i = 0; i < ki.len; i++) {
2726 ki.value[i] = 0;
2728 ki.value[ki.len - 1] = bit;
2729 if (big_cmp_abs(k, &ki) != 0) {
2730 (void) big_double(&ki, &ki);
2732 (void) big_sub_pos(&ki, &ki, k);
2734 (void) big_copy(Lk, p);
2735 (void) big_copy(Lkminus1, &big_Two);
2737 for (i = 0; i < m; i++) {
2738 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2739 goto ret;
2741 (void) big_add_abs(&tmp, &tmp, n);
2742 (void) big_sub_pos(&tmp, &tmp, p);
2743 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2744 goto ret;
2746 if ((ki.value[w] & bit) != 0) {
2747 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2748 BIG_OK) {
2749 goto ret;
2751 (void) big_add_abs(&tmp, &tmp, n);
2752 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2753 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2754 BIG_OK) {
2755 goto ret;
2757 (void) big_copy(Lk, &tmp2);
2758 } else {
2759 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2760 goto ret;
2762 (void) big_add_abs(&tmp, &tmp, n);
2763 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2764 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2765 goto ret;
2767 (void) big_copy(Lkminus1, &tmp2);
2769 bit = bit >> 1;
2770 if (bit == 0) {
2771 bit = BIG_CHUNK_HIGHBIT;
2772 w--;
2776 err = BIG_OK;
2778 ret:
2779 if (tmp2.malloced) big_finish(&tmp2);
2780 ret2:
2781 if (tmp.malloced) big_finish(&tmp);
2782 ret1:
2783 if (ki.malloced) big_finish(&ki);
2785 return (err);
2789 BIG_ERR_CODE
2790 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2792 BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2793 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE];
2794 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE];
2795 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2796 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE];
2797 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE];
2798 BIG_ERR_CODE err;
2799 int e, i, jac;
2801 if (big_cmp_abs(n, &big_One) == 0) {
2802 return (BIG_FALSE);
2804 if (big_cmp_abs(n, &big_Two) == 0) {
2805 return (BIG_TRUE);
2807 if ((n->value[0] & 1) == 0) {
2808 return (BIG_FALSE);
2811 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2812 BIG_OK) {
2813 return (err);
2816 if ((err = big_init1(&nminus1, n->len,
2817 nminus1value, arraysize(nminus1value))) != BIG_OK) {
2818 goto ret1;
2821 if ((err = big_init1(&tmp, 2 * n->len,
2822 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2823 goto ret2;
2826 if ((err = big_init1(&Lkminus1, n->len,
2827 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2828 goto ret3;
2831 if ((err = big_init1(&Lk, n->len,
2832 Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2833 goto ret4;
2836 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2837 (void) big_copy(&nminus1, &o); /* cannot fail */
2838 e = 0;
2839 while ((o.value[0] & 1) == 0) {
2840 e++;
2841 (void) big_half_pos(&o, &o); /* cannot fail */
2843 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2844 BIG_OK) {
2845 goto ret;
2848 i = 0;
2849 while ((i < e) &&
2850 (big_cmp_abs(&tmp, &big_One) != 0) &&
2851 (big_cmp_abs(&tmp, &nminus1) != 0)) {
2852 if ((err =
2853 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2854 BIG_OK)
2855 goto ret;
2856 i++;
2859 if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2860 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2861 err = BIG_FALSE;
2862 goto ret;
2865 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2866 goto ret;
2869 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2870 goto ret;
2872 if (big_cmp_abs(&tmp, n) == 0) {
2873 err = BIG_FALSE;
2874 goto ret;
2877 (void) big_copy(&o, &big_Two);
2878 do {
2879 (void) big_add_abs(&o, &o, &big_One);
2880 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2881 goto ret;
2883 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2884 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2885 goto ret;
2887 } while (jac != -1);
2889 (void) big_add_abs(&tmp, n, &big_One);
2890 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2891 goto ret;
2894 if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2895 (big_cmp_abs(&Lk, &big_Two) == 0)) {
2896 err = BIG_TRUE;
2897 } else {
2898 err = BIG_FALSE;
2901 ret:
2902 if (Lk.malloced) big_finish(&Lk);
2903 ret4:
2904 if (Lkminus1.malloced) big_finish(&Lkminus1);
2905 ret3:
2906 if (tmp.malloced) big_finish(&tmp);
2907 ret2:
2908 if (nminus1.malloced) big_finish(&nminus1);
2909 ret1:
2910 if (o.malloced) big_finish(&o);
2912 return (err);
2916 BIG_ERR_CODE
2917 big_isprime_pos(BIGNUM *n)
2919 return (big_isprime_pos_ext(n, NULL));
2923 #define SIEVESIZE 1000
2926 BIG_ERR_CODE
2927 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2929 static const uint32_t smallprimes[] = {
2930 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2931 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2932 BIG_ERR_CODE err;
2933 int sieve[SIEVESIZE];
2934 int i;
2935 uint32_t off, p;
2937 if ((err = big_copy(result, n)) != BIG_OK) {
2938 return (err);
2940 result->value[0] |= 1;
2941 /* CONSTCOND */
2942 while (1) {
2943 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2944 for (i = 0;
2945 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2946 p = smallprimes[i];
2947 off = big_modhalf_pos(result, p);
2948 off = p - off;
2949 if ((off % 2) == 1) {
2950 off = off + p;
2952 off = off / 2;
2953 while (off < SIEVESIZE) {
2954 sieve[off] = 1;
2955 off = off + p;
2959 for (i = 0; i < SIEVESIZE; i++) {
2960 if (sieve[i] == 0) {
2961 err = big_isprime_pos_ext(result, info);
2962 if (err != BIG_FALSE) {
2963 if (err != BIG_TRUE) {
2964 return (err);
2965 } else {
2966 goto out;
2971 if ((err = big_add_abs(result, result, &big_Two)) !=
2972 BIG_OK) {
2973 return (err);
2978 out:
2979 return (BIG_OK);
2983 BIG_ERR_CODE
2984 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2986 return (big_nextprime_pos_ext(result, n, NULL));
2990 BIG_ERR_CODE
2991 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2993 BIG_ERR_CODE err;
2996 if ((err = big_copy(result, n)) != BIG_OK)
2997 return (err);
2998 result->value[0] |= 1;
2999 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
3000 if (err != BIG_FALSE)
3001 return (err);
3002 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
3003 return (err);
3005 return (BIG_OK);
3010 * given m and e, computes the rest in the equation
3011 * gcd(m, e) = cm * m + ce * e
3013 BIG_ERR_CODE
3014 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
3016 BIGNUM *xi, *ri, *riminus1, *riminus2, *t;
3017 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1;
3018 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
3019 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
3020 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
3021 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
3022 BIG_CHUNK_TYPE t4value[BIGTMPSIZE];
3023 BIG_CHUNK_TYPE t5value[BIGTMPSIZE];
3024 BIG_CHUNK_TYPE t6value[BIGTMPSIZE];
3025 BIG_CHUNK_TYPE t7value[BIGTMPSIZE];
3026 BIG_CHUNK_TYPE t8value[BIGTMPSIZE];
3027 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
3028 BIG_ERR_CODE err;
3029 int len;
3031 if (big_cmp_abs(m, e) >= 0) {
3032 len = m->len;
3033 } else {
3034 len = e->len;
3037 if ((err = big_init1(&t1, len,
3038 t1value, arraysize(t1value))) != BIG_OK) {
3039 return (err);
3041 if ((err = big_init1(&t2, len,
3042 t2value, arraysize(t2value))) != BIG_OK) {
3043 goto ret1;
3045 if ((err = big_init1(&t3, len,
3046 t3value, arraysize(t3value))) != BIG_OK) {
3047 goto ret2;
3049 if ((err = big_init1(&t4, len,
3050 t4value, arraysize(t3value))) != BIG_OK) {
3051 goto ret3;
3053 if ((err = big_init1(&t5, len,
3054 t5value, arraysize(t5value))) != BIG_OK) {
3055 goto ret4;
3057 if ((err = big_init1(&t6, len,
3058 t6value, arraysize(t6value))) != BIG_OK) {
3059 goto ret5;
3061 if ((err = big_init1(&t7, len,
3062 t7value, arraysize(t7value))) != BIG_OK) {
3063 goto ret6;
3065 if ((err = big_init1(&t8, len,
3066 t8value, arraysize(t8value))) != BIG_OK) {
3067 goto ret7;
3070 if ((err = big_init1(&tmp, 2 * len,
3071 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
3072 goto ret8;
3075 ri = &t1;
3076 ri->value[0] = 1;
3077 ri->len = 1;
3078 xi = &t2;
3079 riminus1 = &t3;
3080 riminus2 = &t4;
3081 vmi = &t5;
3082 vei = &t6;
3083 vmiminus1 = &t7;
3084 veiminus1 = &t8;
3086 (void) big_copy(vmiminus1, &big_One);
3087 (void) big_copy(vmi, &big_One);
3088 (void) big_copy(veiminus1, &big_One);
3089 (void) big_copy(xi, &big_One);
3090 vei->len = 1;
3091 vei->value[0] = 0;
3093 (void) big_copy(riminus1, m);
3094 (void) big_copy(ri, e);
3096 while (!big_is_zero(ri)) {
3097 t = riminus2;
3098 riminus2 = riminus1;
3099 riminus1 = ri;
3100 ri = t;
3101 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
3102 goto ret;
3104 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
3105 goto ret;
3107 t = vmiminus1;
3108 vmiminus1 = vmi;
3109 vmi = t;
3110 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
3111 goto ret;
3113 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
3114 goto ret;
3116 t = veiminus1;
3117 veiminus1 = vei;
3118 vei = t;
3119 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
3120 BIG_OK) {
3121 goto ret;
3124 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
3125 goto ret;
3127 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
3128 goto ret;
3130 if (ce != NULL) {
3131 err = big_copy(ce, vei);
3133 ret:
3134 if (tmp.malloced) big_finish(&tmp);
3135 ret8:
3136 if (t8.malloced) big_finish(&t8);
3137 ret7:
3138 if (t7.malloced) big_finish(&t7);
3139 ret6:
3140 if (t6.malloced) big_finish(&t6);
3141 ret5:
3142 if (t5.malloced) big_finish(&t5);
3143 ret4:
3144 if (t4.malloced) big_finish(&t4);
3145 ret3:
3146 if (t3.malloced) big_finish(&t3);
3147 ret2:
3148 if (t2.malloced) big_finish(&t2);
3149 ret1:
3150 if (t1.malloced) big_finish(&t1);
3152 return (err);
3156 * Get a rlen-bit random number in BIGNUM format. Caller-supplied
3157 * (*rfunc)(void *dbuf, size_t dlen) must return 0 for success and
3158 * -1 for failure. Note: (*rfunc)() takes length in bytes, not bits.
3160 BIG_ERR_CODE
3161 big_random(BIGNUM *r, size_t rlen, int (*rfunc)(void *, size_t))
3163 size_t rwords, rbytes;
3164 int shift;
3166 if (r == NULL || rlen == 0 || rfunc == NULL)
3167 return (BIG_INVALID_ARGS);
3170 * Convert rlen bits to r->len words (32- or 64-bit), rbytes bytes
3171 * and extend r if it's not big enough to hold the random number.
3173 rwords = BITLEN2BIGNUMLEN(rlen);
3174 rbytes = rwords * sizeof (BIG_CHUNK_TYPE);
3175 if (big_extend(r, rwords) != BIG_OK)
3176 return (BIG_NO_MEM);
3177 #ifdef BIGNUM_CHUNK_32
3178 r->len = rwords;
3179 #else
3180 r->len = (uint32_t)rwords;
3181 #endif
3183 if ((*rfunc)(r->value, rbytes) < 0)
3184 return (BIG_NO_RANDOM);
3186 r->value[rwords - 1] |= BIG_CHUNK_HIGHBIT;
3189 * If the bit length is not a word boundary, shift the most
3190 * significant word so that we have an exactly rlen-long number.
3192 if ((shift = rlen % BIG_CHUNK_SIZE) != 0)
3193 r->value[rwords - 1] >>= (BIG_CHUNK_SIZE - shift);
3195 r->sign = 1; /* non-negative */
3197 return (BIG_OK);