4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
30 * There are 4 preprocessor symbols used to configure the bignum
31 * implementation. This file contains no logic to configure based on
32 * processor; we leave that to the Makefiles to specify.
35 * Meaning: There is support for a fast floating-point implementation of
36 * Montgomery multiply.
39 * Meaning: There are processor-specific versions of the low level
40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
42 * for all 4 functions. You cannot pick and choose which subset of these
43 * functions to support; that would lead to a rat's nest of #ifdefs.
46 * Meaning: Call multiply support functions through a function pointer.
47 * On x86, there are multiple implementations for different hardware
48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
49 * a function is first used. So, the support functions are called through
50 * a function pointer. There is no need for that on Sparc, because there
51 * is only one implementation; support functions are called directly.
52 * Later, if there were some new VIS instruction, or something, and a
53 * run-time test were needed, rather than variant kernel modules and
54 * libraries, then HWCAP would be defined for Sparc, as well.
57 * Meaning: It is safe to use generic C code that assumes the existence
58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
65 #include <sys/types.h>
70 #include <sys/mdesc.h>
71 #include <sys/crypto/common.h>
74 #include <sys/param.h>
75 #include <sys/sunddi.h>
86 #include <sys/x86_archext.h> /* cpuid_getvendor() */
87 #include <sys/cpuvar.h>
89 #include <sys/auxv.h> /* getisax() */
94 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
95 #define UI32(ui) ((uint32_t)ui)
96 #else /* size_t already 32-bits */
102 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
103 #define big_free(ptr, size) kmem_free(ptr, size)
107 * Allocate memory of newsize bytes and copy oldsize bytes
108 * to the newly-allocated memory, then free the
109 * previously-allocated memory.
110 * Note: newsize must be > oldsize
113 big_realloc(void *from
, size_t oldsize
, size_t newsize
)
117 rv
= kmem_alloc(newsize
, KM_NOSLEEP
);
119 bcopy(from
, rv
, oldsize
);
120 kmem_free(from
, oldsize
);
128 #define big_malloc(size) malloc(size)
129 #define big_free(ptr, size) free(ptr)
134 big_free(void *ptr
, size_t size
)
136 printf("freed %d bytes at %p\n", size
, ptr
);
141 big_malloc(size_t size
)
145 printf("malloced %d bytes, addr:%p\n", size
, rv
);
148 #endif /* MALLOC_DEBUG */
150 #define big_realloc(x, y, z) realloc((x), (z))
155 * Print a BIGNUM type to stdout.
158 printbignum(char *aname
, BIGNUM
*a
)
162 (void) printf("\n%s\n%d\n", aname
, a
->sign
*a
->len
);
163 for (i
= a
->len
- 1; i
>= 0; i
--) {
164 #ifdef BIGNUM_CHUNK_32
165 (void) printf("%08x ", a
->value
[i
]);
166 if (((i
& (BITSINBYTE
- 1)) == 0) && (i
!= 0)) {
170 (void) printf("%08x %08x ", (uint32_t)((a
->value
[i
]) >> 32),
171 (uint32_t)((a
->value
[i
]) & 0xffffffff));
172 if (((i
& 3) == 0) && (i
!= 0)) { /* end of this chunk */
185 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
186 * Cache the result, as the CPU can't change.
188 * Note: the userland version uses getisax() and checks for an AMD-64-only
189 * feature. The kernel version uses cpuid_getvendor().
192 bignum_on_intel(void)
194 static int cached_result
= -1;
196 if (cached_result
== -1) { /* first time */
198 cached_result
= (cpuid_getvendor(CPU
) == X86_VENDOR_Intel
);
202 (void) getisax(&ui
, 1);
203 cached_result
= ((ui
& AV_386_AMD_MMX
) == 0);
207 return (cached_result
);
214 * Initialize and allocate memory for a BIGNUM type.
216 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
218 * Note: call big_finish() to free memory allocated by big_init().
221 * number Uninitialized memory for BIGNUM
222 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
225 * number Initialized BIGNUM
227 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
230 big_init(BIGNUM
*number
, int size
)
232 number
->value
= big_malloc(BIGNUM_WORDSIZE
* size
);
233 if (number
->value
== NULL
) {
239 number
->malloced
= 1;
246 * Initialize and, if needed, allocate memory for a BIGNUM type.
247 * Use the buffer passed, buf, if any, instad of allocating memory
248 * if it's at least "size" bytes.
250 * Note: call big_finish() to free memory allocated by big_init().
253 * number Uninitialized memory for BIGNUM
254 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
255 * buf Buffer for storing a BIGNUM.
256 * If NULL, big_init1() will allocate a buffer
257 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
260 * number Initialized BIGNUM
262 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
265 big_init1(BIGNUM
*number
, int size
, BIG_CHUNK_TYPE
*buf
, int bufsize
)
267 if ((buf
== NULL
) || (size
> bufsize
)) {
268 number
->value
= big_malloc(BIGNUM_WORDSIZE
* size
);
269 if (number
->value
== NULL
) {
273 number
->malloced
= 1;
276 number
->size
= bufsize
;
277 number
->malloced
= 0;
288 * Free memory, if any, allocated by big_init() or big_init1().
291 big_finish(BIGNUM
*number
)
293 if (number
->malloced
== 1) {
294 big_free(number
->value
, BIGNUM_WORDSIZE
* number
->size
);
295 number
->malloced
= 0;
301 * bn->size should be at least
302 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
303 * converts from byte-big-endian format to bignum format (words in
304 * little endian order, but bytes within the words big endian)
307 bytestring2bignum(BIGNUM
*bn
, uchar_t
*kn
, size_t len
)
311 const uint32_t slen
= UI32(len
);
321 offs
= slen
% BIGNUM_WORDSIZE
;
322 bn
->len
= slen
/ BIGNUM_WORDSIZE
;
324 for (i
= 0; i
< slen
/ BIGNUM_WORDSIZE
; i
++) {
325 knwordp
= &(kn
[slen
- BIGNUM_WORDSIZE
* (i
+ 1)]);
327 for (j
= 1; j
< BIGNUM_WORDSIZE
; j
++) {
328 word
= (word
<< BITSINBYTE
) + knwordp
[j
];
334 for (i
= 1; i
< offs
; i
++) word
= (word
<< BITSINBYTE
) + kn
[i
];
335 bn
->value
[bn
->len
++] = word
;
337 while ((bn
->len
> 1) && (bn
->value
[bn
->len
- 1] == 0)) {
344 * copies the least significant len bytes if
345 * len < bn->len * BIGNUM_WORDSIZE
346 * converts from bignum format to byte-big-endian format.
347 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
350 bignum2bytestring(uchar_t
*kn
, BIGNUM
*bn
, size_t len
)
354 const uint32_t slen
= UI32(len
);
357 if (len
< BIGNUM_WORDSIZE
* bn
->len
) {
358 for (i
= 0; i
< slen
/ BIGNUM_WORDSIZE
; i
++) {
360 for (j
= 0; j
< BIGNUM_WORDSIZE
; j
++) {
361 kn
[slen
- BIGNUM_WORDSIZE
* i
- j
- 1] =
363 word
= word
>> BITSINBYTE
;
366 offs
= slen
% BIGNUM_WORDSIZE
;
368 word
= bn
->value
[slen
/ BIGNUM_WORDSIZE
];
369 for (i
= slen
% BIGNUM_WORDSIZE
; i
> 0; i
--) {
370 kn
[i
- 1] = word
& 0xff;
371 word
= word
>> BITSINBYTE
;
375 for (i
= 0; i
< bn
->len
; i
++) {
377 for (j
= 0; j
< BIGNUM_WORDSIZE
; j
++) {
378 kn
[slen
- BIGNUM_WORDSIZE
* i
- j
- 1] =
380 word
= word
>> BITSINBYTE
;
383 for (i
= 0; i
< slen
- BIGNUM_WORDSIZE
* bn
->len
; i
++) {
391 big_bitlength(BIGNUM
*a
)
397 while ((l
> 0) && (a
->value
[l
] == 0)) {
402 while ((b
> 1) && ((c
& BIG_CHUNK_HIGHBIT
) == 0)) {
407 return (l
* BIG_CHUNK_SIZE
+ b
);
413 * Copy BIGNUM src to dest, allocating memory if needed.
416 big_copy(BIGNUM
*dest
, BIGNUM
*src
)
418 BIG_CHUNK_TYPE
*newptr
;
422 while ((len
> 1) && (src
->value
[len
- 1] == 0)) {
426 if (dest
->size
< len
) {
427 if (dest
->malloced
== 1) {
428 newptr
= (BIG_CHUNK_TYPE
*)big_realloc(dest
->value
,
429 BIGNUM_WORDSIZE
* dest
->size
,
430 BIGNUM_WORDSIZE
* len
);
432 newptr
= (BIG_CHUNK_TYPE
*)
433 big_malloc(BIGNUM_WORDSIZE
* len
);
434 if (newptr
!= NULL
) {
438 if (newptr
== NULL
) {
441 dest
->value
= newptr
;
445 dest
->sign
= src
->sign
;
446 for (i
= 0; i
< len
; i
++) {
447 dest
->value
[i
] = src
->value
[i
];
456 * Allocate memory to extend BIGNUM number to size bignum chunks,
457 * if not at least that size already.
460 big_extend(BIGNUM
*number
, int size
)
462 BIG_CHUNK_TYPE
*newptr
;
465 if (number
->size
>= size
)
467 if (number
->malloced
) {
468 number
->value
= big_realloc(number
->value
,
469 BIGNUM_WORDSIZE
* number
->size
,
470 BIGNUM_WORDSIZE
* size
);
472 newptr
= big_malloc(BIGNUM_WORDSIZE
* size
);
473 if (newptr
!= NULL
) {
474 for (i
= 0; i
< number
->size
; i
++) {
475 newptr
[i
] = number
->value
[i
];
478 number
->value
= newptr
;
481 if (number
->value
== NULL
) {
486 number
->malloced
= 1;
491 /* returns 1 if n == 0 */
493 big_is_zero(BIGNUM
*n
)
498 for (i
= 0; i
< n
->len
; i
++) {
499 if (n
->value
[i
] != 0) {
508 big_add_abs(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
510 int i
, shorter
, longer
;
511 BIG_CHUNK_TYPE cy
, ai
;
512 BIG_CHUNK_TYPE
*r
, *a
, *b
, *c
;
516 if (aa
->len
> bb
->len
) {
525 if (result
->size
< longer
+ 1) {
526 err
= big_extend(result
, longer
+ 1);
535 c
= longerarg
->value
;
537 for (i
= 0; i
< shorter
; i
++) {
539 r
[i
] = ai
+ b
[i
] + cy
;
542 } else if (r
[i
] < ai
) {
546 for (; i
< longer
; i
++) {
555 result
->len
= longer
+ 1;
557 result
->len
= longer
;
564 /* caller must make sure that result has at least len words allocated */
566 big_sub_vec(BIG_CHUNK_TYPE
*r
, BIG_CHUNK_TYPE
*a
, BIG_CHUNK_TYPE
*b
, int len
)
569 BIG_CHUNK_TYPE cy
, ai
;
572 for (i
= 0; i
< len
; i
++) {
574 r
[i
] = ai
+ (~b
[i
]) + cy
;
577 } else if (r
[i
] < ai
) {
584 /* result=aa-bb it is assumed that aa>=bb */
586 big_sub_pos(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
589 BIG_CHUNK_TYPE cy
= 1, ai
;
590 BIG_CHUNK_TYPE
*r
, *a
, *b
;
591 BIG_ERR_CODE err
= BIG_OK
;
593 if (aa
->len
> bb
->len
) {
598 if (result
->size
< aa
->len
) {
599 err
= big_extend(result
, aa
->len
);
608 result
->len
= aa
->len
;
610 for (i
= 0; i
< shorter
; i
++) {
612 r
[i
] = ai
+ (~b
[i
]) + cy
;
615 } else if (r
[i
] < ai
) {
619 for (; i
< aa
->len
; i
++) {
621 r
[i
] = ai
+ (~0) + cy
;
629 return (BIG_INVALID_ARGS
);
636 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
638 big_cmp_abs(BIGNUM
*aa
, BIGNUM
*bb
)
642 if (aa
->len
> bb
->len
) {
643 for (i
= aa
->len
- 1; i
> bb
->len
- 1; i
--) {
644 if (aa
->value
[i
] > 0) {
648 } else if (aa
->len
< bb
->len
) {
649 for (i
= bb
->len
- 1; i
> aa
->len
- 1; i
--) {
650 if (bb
->value
[i
] > 0) {
657 for (; i
>= 0; i
--) {
658 if (aa
->value
[i
] > bb
->value
[i
]) {
660 } else if (aa
->value
[i
] < bb
->value
[i
]) {
670 big_sub(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
674 if ((bb
->sign
== -1) && (aa
->sign
== 1)) {
675 if ((err
= big_add_abs(result
, aa
, bb
)) != BIG_OK
) {
679 } else if ((aa
->sign
== -1) && (bb
->sign
== 1)) {
680 if ((err
= big_add_abs(result
, aa
, bb
)) != BIG_OK
) {
684 } else if ((aa
->sign
== 1) && (bb
->sign
== 1)) {
685 if (big_cmp_abs(aa
, bb
) >= 0) {
686 if ((err
= big_sub_pos(result
, aa
, bb
)) != BIG_OK
) {
691 if ((err
= big_sub_pos(result
, bb
, aa
)) != BIG_OK
) {
697 if (big_cmp_abs(aa
, bb
) >= 0) {
698 if ((err
= big_sub_pos(result
, aa
, bb
)) != BIG_OK
) {
703 if ((err
= big_sub_pos(result
, bb
, aa
)) != BIG_OK
) {
714 big_add(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
718 if ((bb
->sign
== -1) && (aa
->sign
== -1)) {
719 if ((err
= big_add_abs(result
, aa
, bb
)) != BIG_OK
) {
723 } else if ((aa
->sign
== 1) && (bb
->sign
== 1)) {
724 if ((err
= big_add_abs(result
, aa
, bb
)) != BIG_OK
) {
728 } else if ((aa
->sign
== 1) && (bb
->sign
== -1)) {
729 if (big_cmp_abs(aa
, bb
) >= 0) {
730 if ((err
= big_sub_pos(result
, aa
, bb
)) != BIG_OK
) {
735 if ((err
= big_sub_pos(result
, bb
, aa
)) != BIG_OK
) {
741 if (big_cmp_abs(aa
, bb
) >= 0) {
742 if ((err
= big_sub_pos(result
, aa
, bb
)) != BIG_OK
) {
747 if ((err
= big_sub_pos(result
, bb
, aa
)) != BIG_OK
) {
759 big_half_pos(BIGNUM
*result
, BIGNUM
*aa
)
763 BIG_CHUNK_TYPE cy
, cy1
;
764 BIG_CHUNK_TYPE
*a
, *r
;
766 if (result
->size
< aa
->len
) {
767 err
= big_extend(result
, aa
->len
);
773 result
->len
= aa
->len
;
777 for (i
= aa
->len
- 1; i
>= 0; i
--) {
778 cy1
= a
[i
] << (BIG_CHUNK_SIZE
- 1);
779 r
[i
] = (cy
| (a
[i
] >> 1));
782 if (r
[result
->len
- 1] == 0) {
791 big_double(BIGNUM
*result
, BIGNUM
*aa
)
795 BIG_CHUNK_TYPE cy
, cy1
;
796 BIG_CHUNK_TYPE
*a
, *r
;
799 ((aa
->value
[aa
->len
- 1] & BIG_CHUNK_HIGHBIT
) != 0)) {
805 if (result
->size
< rsize
) {
806 err
= big_extend(result
, rsize
);
813 if (rsize
== aa
->len
+ 1) {
817 for (i
= 0; i
< aa
->len
; i
++) {
818 cy1
= a
[i
] >> (BIG_CHUNK_SIZE
- 1);
819 r
[i
] = (cy
| (a
[i
] << 1));
828 * returns aa mod b, aa must be nonneg, b must be a max
829 * (BIG_CHUNK_SIZE / 2)-bit integer
832 big_modhalf_pos(BIGNUM
*aa
, uint32_t b
)
840 rem
= aa
->value
[aa
->len
- 1] % b
;
841 for (i
= aa
->len
- 2; i
>= 0; i
--) {
842 rem
= ((rem
<< (BIG_CHUNK_SIZE
/ 2)) |
843 (aa
->value
[i
] >> (BIG_CHUNK_SIZE
/ 2))) % b
;
844 rem
= ((rem
<< (BIG_CHUNK_SIZE
/ 2)) |
845 (aa
->value
[i
] & BIG_CHUNK_LOWHALFBITS
)) % b
;
848 return ((uint32_t)rem
);
853 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
854 * result->size should be at least aa->len at entry
855 * aa, bb, and result should be positive
858 big_sub_pos_high(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
863 lendiff
= aa
->len
- bb
->len
;
864 res1
.size
= result
->size
- lendiff
;
866 res1
.value
= result
->value
+ lendiff
;
867 aa1
.size
= aa
->size
- lendiff
;
868 aa1
.value
= aa
->value
+ lendiff
;
871 (void) big_sub_pos(&res1
, &aa1
, bb
);
872 if (result
->value
!= aa
->value
) {
873 for (i
= 0; i
< lendiff
; i
++) {
874 result
->value
[i
] = aa
->value
[i
];
877 result
->len
= aa
->len
;
882 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
883 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
884 * aa->len should be >= bb->len
887 big_cmp_abs_high(BIGNUM
*aa
, BIGNUM
*bb
)
892 lendiff
= aa
->len
- bb
->len
;
894 aa1
.size
= aa
->size
- lendiff
;
896 aa1
.value
= aa
->value
+ lendiff
;
897 return (big_cmp_abs(&aa1
, bb
));
902 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
903 * result should have enough space allocated.
906 big_mulhalf_low(BIGNUM
*result
, BIGNUM
*aa
, BIG_CHUNK_TYPE b
)
909 BIG_CHUNK_TYPE t1
, t2
, ai
, cy
;
910 BIG_CHUNK_TYPE
*a
, *r
;
915 for (i
= 0; i
< aa
->len
; i
++) {
917 t1
= (ai
& BIG_CHUNK_LOWHALFBITS
) * b
+ cy
;
918 t2
= (ai
>> (BIG_CHUNK_SIZE
/ 2)) * b
+
919 (t1
>> (BIG_CHUNK_SIZE
/ 2));
920 r
[i
] = (t1
& BIG_CHUNK_LOWHALFBITS
) |
921 (t2
<< (BIG_CHUNK_SIZE
/ 2));
922 cy
= t2
>> (BIG_CHUNK_SIZE
/ 2);
925 result
->len
= aa
->len
+ 1;
926 result
->sign
= aa
->sign
;
931 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
932 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
933 * result should have enough space allocated.
936 big_mulhalf_high(BIGNUM
*result
, BIGNUM
*aa
, BIG_CHUNK_TYPE b
)
939 BIG_CHUNK_TYPE t1
, t2
, ai
, cy
, ri
;
940 BIG_CHUNK_TYPE
*a
, *r
;
946 for (i
= 0; i
< aa
->len
; i
++) {
948 t1
= (ai
& BIG_CHUNK_LOWHALFBITS
) * b
+ cy
;
949 t2
= (ai
>> (BIG_CHUNK_SIZE
/ 2)) * b
+
950 (t1
>> (BIG_CHUNK_SIZE
/ 2));
951 r
[i
] = (t1
<< (BIG_CHUNK_SIZE
/ 2)) + ri
;
952 ri
= t2
& BIG_CHUNK_LOWHALFBITS
;
953 cy
= t2
>> (BIG_CHUNK_SIZE
/ 2);
955 r
[i
] = (cy
<< (BIG_CHUNK_SIZE
/ 2)) + ri
;
956 result
->len
= aa
->len
+ 1;
957 result
->sign
= aa
->sign
;
961 /* it is assumed that result->size is big enough */
963 big_shiftleft(BIGNUM
*result
, BIGNUM
*aa
, int offs
)
966 BIG_CHUNK_TYPE cy
, ai
;
970 (void) big_copy(result
, aa
);
975 for (i
= 0; i
< aa
->len
; i
++) {
977 result
->value
[i
] = (ai
<< offs
) | cy
;
978 cy
= ai
>> (BIG_CHUNK_SIZE
- offs
);
981 result
->len
= aa
->len
+ 1;
982 result
->value
[result
->len
- 1] = cy
;
984 result
->len
= aa
->len
;
986 result
->sign
= aa
->sign
;
990 /* it is assumed that result->size is big enough */
992 big_shiftright(BIGNUM
*result
, BIGNUM
*aa
, int offs
)
995 BIG_CHUNK_TYPE cy
, ai
;
999 (void) big_copy(result
, aa
);
1003 cy
= aa
->value
[0] >> offs
;
1004 for (i
= 1; i
< aa
->len
; i
++) {
1006 result
->value
[i
- 1] = (ai
<< (BIG_CHUNK_SIZE
- offs
)) | cy
;
1009 result
->len
= aa
->len
;
1010 result
->value
[result
->len
- 1] = cy
;
1011 result
->sign
= aa
->sign
;
1016 * result = aa/bb remainder = aa mod bb
1017 * it is assumed that aa and bb are positive
1020 big_div_pos(BIGNUM
*result
, BIGNUM
*remainder
, BIGNUM
*aa
, BIGNUM
*bb
)
1022 BIG_ERR_CODE err
= BIG_OK
;
1023 int i
, alen
, blen
, tlen
, rlen
, offs
;
1024 BIG_CHUNK_TYPE higha
, highb
, coeff
;
1025 BIG_CHUNK_TYPE
*a
, *b
;
1026 BIGNUM bbhigh
, bblow
, tresult
, tmp1
, tmp2
;
1027 BIG_CHUNK_TYPE tmp1value
[BIGTMPSIZE
];
1028 BIG_CHUNK_TYPE tmp2value
[BIGTMPSIZE
];
1029 BIG_CHUNK_TYPE tresultvalue
[BIGTMPSIZE
];
1030 BIG_CHUNK_TYPE bblowvalue
[BIGTMPSIZE
];
1031 BIG_CHUNK_TYPE bbhighvalue
[BIGTMPSIZE
];
1037 while ((alen
> 1) && (a
[alen
- 1] == 0)) {
1041 while ((blen
> 1) && (b
[blen
- 1] == 0)) {
1045 if ((blen
== 1) && (b
[0] == 0)) {
1046 return (BIG_DIV_BY_0
);
1049 if (big_cmp_abs(aa
, bb
) < 0) {
1050 if ((remainder
!= NULL
) &&
1051 ((err
= big_copy(remainder
, aa
)) != BIG_OK
)) {
1054 if (result
!= NULL
) {
1057 result
->value
[0] = 0;
1062 if ((err
= big_init1(&bblow
, blen
+ 1,
1063 bblowvalue
, arraysize(bblowvalue
))) != BIG_OK
)
1066 if ((err
= big_init1(&bbhigh
, blen
+ 1,
1067 bbhighvalue
, arraysize(bbhighvalue
))) != BIG_OK
)
1070 if ((err
= big_init1(&tmp1
, alen
+ 2,
1071 tmp1value
, arraysize(tmp1value
))) != BIG_OK
)
1074 if ((err
= big_init1(&tmp2
, blen
+ 2,
1075 tmp2value
, arraysize(tmp2value
))) != BIG_OK
)
1078 if ((err
= big_init1(&tresult
, alen
- blen
+ 2,
1079 tresultvalue
, arraysize(tresultvalue
))) != BIG_OK
)
1083 highb
= b
[blen
- 1];
1084 if (highb
>= (BIG_CHUNK_HALF_HIGHBIT
<< 1)) {
1085 highb
= highb
>> (BIG_CHUNK_SIZE
/ 2);
1086 offs
= (BIG_CHUNK_SIZE
/ 2);
1088 while ((highb
& BIG_CHUNK_HALF_HIGHBIT
) == 0) {
1093 big_shiftleft(&bblow
, bb
, offs
);
1095 if (offs
<= (BIG_CHUNK_SIZE
/ 2 - 1)) {
1096 big_shiftleft(&bbhigh
, &bblow
, BIG_CHUNK_SIZE
/ 2);
1098 big_shiftright(&bbhigh
, &bblow
, BIG_CHUNK_SIZE
/ 2);
1100 if (bbhigh
.value
[bbhigh
.len
- 1] == 0) {
1103 bbhigh
.value
[bbhigh
.len
] = 0;
1106 highb
= bblow
.value
[bblow
.len
- 1];
1108 big_shiftleft(&tmp1
, aa
, offs
);
1109 rlen
= tmp1
.len
- bblow
.len
+ 1;
1114 tmp1
.value
[tmp1
.len
- 1] = 0;
1115 for (i
= 0; i
< rlen
; i
++) {
1116 higha
= (tmp1
.value
[tlen
- 1] << (BIG_CHUNK_SIZE
/ 2)) +
1117 (tmp1
.value
[tlen
- 2] >> (BIG_CHUNK_SIZE
/ 2));
1118 coeff
= higha
/ (highb
+ 1);
1119 big_mulhalf_high(&tmp2
, &bblow
, coeff
);
1120 big_sub_pos_high(&tmp1
, &tmp1
, &tmp2
);
1122 while (tmp1
.value
[tlen
- 1] > 0) {
1123 big_sub_pos_high(&tmp1
, &tmp1
, &bbhigh
);
1129 while (big_cmp_abs_high(&tmp1
, &bbhigh
) >= 0) {
1130 big_sub_pos_high(&tmp1
, &tmp1
, &bbhigh
);
1133 tresult
.value
[rlen
- i
- 1] = coeff
<< (BIG_CHUNK_SIZE
/ 2);
1134 higha
= tmp1
.value
[tlen
- 1];
1135 coeff
= higha
/ (highb
+ 1);
1136 big_mulhalf_low(&tmp2
, &bblow
, coeff
);
1138 big_sub_pos_high(&tmp1
, &tmp1
, &tmp2
);
1139 while (big_cmp_abs_high(&tmp1
, &bblow
) >= 0) {
1140 big_sub_pos_high(&tmp1
, &tmp1
, &bblow
);
1143 tresult
.value
[rlen
- i
- 1] =
1144 tresult
.value
[rlen
- i
- 1] + coeff
;
1147 big_shiftright(&tmp1
, &tmp1
, offs
);
1151 if ((remainder
!= NULL
) &&
1152 ((err
= big_copy(remainder
, &tmp1
)) != BIG_OK
))
1156 err
= big_copy(result
, &tresult
);
1159 big_finish(&tresult
);
1165 big_finish(&bbhigh
);
1173 * If there is no processor-specific integer implementation of
1174 * the lower level multiply functions, then this code is provided
1175 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1178 * There are two generic implementations. One that assumes that
1179 * there is hardware and C compiler support for a 32 x 32 --> 64
1180 * bit unsigned multiply, but otherwise is not specific to any
1181 * processor, platform, or ISA.
1183 * The other makes very few assumptions about hardware capabilities.
1184 * It does not even assume that there is any implementation of a
1185 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1186 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1187 * multiplies from 16 x 16 --> 32 bit multiplies.
1191 #if !defined(PSR_MUL)
1195 #if (BIG_CHUNK_SIZE == 32)
1199 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1201 pf = (uint64_t)a[R + 1]; \
1203 r[R] = (uint32_t)t; \
1206 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1209 r[R] = (uint32_t)t; \
1212 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1213 t = (uint64_t)r[R]; \
1215 pf = (uint64_t)a[R + 1]; \
1217 r[R] = (uint32_t)t; \
1220 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1221 t = (uint64_t)r[R]; \
1224 r[R] = (uint32_t)t; \
1233 * where r and a are vectors; b is a single 32-bit digit
1237 big_mul_set_vec(uint32_t *r
, uint32_t *a
, int len
, uint32_t b
)
1239 uint64_t d
, pf
, p
, t
, cy
;
1245 pf
= (uint64_t)a
[0];
1246 while (len
> UNROLL
) {
1247 MUL_SET_VEC_ROUND_PREFETCH(0);
1248 MUL_SET_VEC_ROUND_PREFETCH(1);
1249 MUL_SET_VEC_ROUND_PREFETCH(2);
1250 MUL_SET_VEC_ROUND_PREFETCH(3);
1251 MUL_SET_VEC_ROUND_PREFETCH(4);
1252 MUL_SET_VEC_ROUND_PREFETCH(5);
1253 MUL_SET_VEC_ROUND_PREFETCH(6);
1254 MUL_SET_VEC_ROUND_PREFETCH(7);
1259 if (len
== UNROLL
) {
1260 MUL_SET_VEC_ROUND_PREFETCH(0);
1261 MUL_SET_VEC_ROUND_PREFETCH(1);
1262 MUL_SET_VEC_ROUND_PREFETCH(2);
1263 MUL_SET_VEC_ROUND_PREFETCH(3);
1264 MUL_SET_VEC_ROUND_PREFETCH(4);
1265 MUL_SET_VEC_ROUND_PREFETCH(5);
1266 MUL_SET_VEC_ROUND_PREFETCH(6);
1267 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1268 return ((uint32_t)cy
);
1271 MUL_SET_VEC_ROUND_PREFETCH(0);
1277 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1279 return ((uint32_t)cy
);
1284 * where r and a are vectors; b is a single 32-bit digit
1288 big_mul_add_vec(uint32_t *r
, uint32_t *a
, int len
, uint32_t b
)
1290 uint64_t d
, pf
, p
, t
, cy
;
1296 pf
= (uint64_t)a
[0];
1298 MUL_ADD_VEC_ROUND_PREFETCH(0);
1299 MUL_ADD_VEC_ROUND_PREFETCH(1);
1300 MUL_ADD_VEC_ROUND_PREFETCH(2);
1301 MUL_ADD_VEC_ROUND_PREFETCH(3);
1302 MUL_ADD_VEC_ROUND_PREFETCH(4);
1303 MUL_ADD_VEC_ROUND_PREFETCH(5);
1304 MUL_ADD_VEC_ROUND_PREFETCH(6);
1305 MUL_ADD_VEC_ROUND_PREFETCH(7);
1311 MUL_ADD_VEC_ROUND_PREFETCH(0);
1312 MUL_ADD_VEC_ROUND_PREFETCH(1);
1313 MUL_ADD_VEC_ROUND_PREFETCH(2);
1314 MUL_ADD_VEC_ROUND_PREFETCH(3);
1315 MUL_ADD_VEC_ROUND_PREFETCH(4);
1316 MUL_ADD_VEC_ROUND_PREFETCH(5);
1317 MUL_ADD_VEC_ROUND_PREFETCH(6);
1318 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1319 return ((uint32_t)cy
);
1322 MUL_ADD_VEC_ROUND_PREFETCH(0);
1328 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1330 return ((uint32_t)cy
);
1332 #endif /* UNROLL8 */
1335 big_sqr_vec(uint32_t *r
, uint32_t *a
, int len
)
1339 uint64_t p
, s
, t
, t2
, cy
;
1345 tr
[tlen
] = big_mul_set_vec(tr
, ta
+ 1, tlen
, ta
[0]);
1346 while (--tlen
> 0) {
1349 tr
[tlen
] = big_mul_add_vec(tr
, ta
+ 1, tlen
, ta
[0]);
1355 p
= ((uint64_t)r
[1] << 1) + cy
;
1361 s
= (uint64_t)a
[row
];
1363 p
= (uint64_t)r
[col
] << 1;
1366 t2
= (uint64_t)d
+ cy
;
1367 r
[col
] = (uint32_t)t2
;
1368 cy
= (t
>> 32) + (t2
>> 32);
1371 p
= ((uint64_t)r
[col
+ 1] << 1) + cy
;
1372 r
[col
+ 1] = (uint32_t)p
;
1377 r
[col
+ 1] = (uint32_t)cy
;
1380 #else /* BIG_CHUNK_SIZE == 64 */
1383 * r = r + a * digit, r and a are vectors of length len
1384 * returns the carry digit
1387 big_mul_add_vec(BIG_CHUNK_TYPE
*r
, BIG_CHUNK_TYPE
*a
, int len
,
1388 BIG_CHUNK_TYPE digit
)
1390 BIG_CHUNK_TYPE cy
, cy1
, retcy
, dlow
, dhigh
;
1394 dlow
= digit
& BIG_CHUNK_LOWHALFBITS
;
1395 dhigh
= digit
>> (BIG_CHUNK_SIZE
/ 2);
1396 for (i
= 0; i
< len
; i
++) {
1397 cy
= (cy1
>> (BIG_CHUNK_SIZE
/ 2)) +
1398 dlow
* (a
[i
] & BIG_CHUNK_LOWHALFBITS
) +
1399 (r
[i
] & BIG_CHUNK_LOWHALFBITS
);
1400 cy1
= (cy
>> (BIG_CHUNK_SIZE
/ 2)) +
1401 dlow
* (a
[i
] >> (BIG_CHUNK_SIZE
/ 2)) +
1402 (r
[i
] >> (BIG_CHUNK_SIZE
/ 2));
1403 r
[i
] = (cy
& BIG_CHUNK_LOWHALFBITS
) |
1404 (cy1
<< (BIG_CHUNK_SIZE
/ 2));
1406 retcy
= cy1
>> (BIG_CHUNK_SIZE
/ 2);
1408 cy1
= r
[0] & BIG_CHUNK_LOWHALFBITS
;
1409 for (i
= 0; i
< len
- 1; i
++) {
1410 cy
= (cy1
>> (BIG_CHUNK_SIZE
/ 2)) +
1411 dhigh
* (a
[i
] & BIG_CHUNK_LOWHALFBITS
) +
1412 (r
[i
] >> (BIG_CHUNK_SIZE
/ 2));
1413 r
[i
] = (cy1
& BIG_CHUNK_LOWHALFBITS
) |
1414 (cy
<< (BIG_CHUNK_SIZE
/ 2));
1415 cy1
= (cy
>> (BIG_CHUNK_SIZE
/ 2)) +
1416 dhigh
* (a
[i
] >> (BIG_CHUNK_SIZE
/ 2)) +
1417 (r
[i
+ 1] & BIG_CHUNK_LOWHALFBITS
);
1419 cy
= (cy1
>> (BIG_CHUNK_SIZE
/ 2)) +
1420 dhigh
* (a
[len
- 1] & BIG_CHUNK_LOWHALFBITS
) +
1421 (r
[len
- 1] >> (BIG_CHUNK_SIZE
/ 2));
1422 r
[len
- 1] = (cy1
& BIG_CHUNK_LOWHALFBITS
) |
1423 (cy
<< (BIG_CHUNK_SIZE
/ 2));
1424 retcy
= (cy
>> (BIG_CHUNK_SIZE
/ 2)) +
1425 dhigh
* (a
[len
- 1] >> (BIG_CHUNK_SIZE
/ 2)) + retcy
;
1432 * r = a * digit, r and a are vectors of length len
1433 * returns the carry digit
1436 big_mul_set_vec(BIG_CHUNK_TYPE
*r
, BIG_CHUNK_TYPE
*a
, int len
,
1437 BIG_CHUNK_TYPE digit
)
1442 for (i
= 0; i
< len
; i
++) {
1445 return (big_mul_add_vec(r
, a
, len
, digit
));
1449 big_sqr_vec(BIG_CHUNK_TYPE
*r
, BIG_CHUNK_TYPE
*a
, int len
)
1454 r
[len
] = big_mul_set_vec(r
, a
, len
, a
[0]);
1455 for (i
= 1; i
< len
; ++i
)
1456 r
[len
+ i
] = big_mul_add_vec(r
+ i
, a
, len
, a
[i
]);
1459 #endif /* BIG_CHUNK_SIZE == 32/64 */
1462 #else /* ! UMUL64 */
1464 #if (BIG_CHUNK_SIZE != 32)
1465 #error "Don't use 64-bit chunks without defining UMUL64"
1470 * r = r + a * digit, r and a are vectors of length len
1471 * returns the carry digit
1474 big_mul_add_vec(uint32_t *r
, uint32_t *a
, int len
, uint32_t digit
)
1476 uint32_t cy
, cy1
, retcy
, dlow
, dhigh
;
1480 dlow
= digit
& 0xffff;
1481 dhigh
= digit
>> 16;
1482 for (i
= 0; i
< len
; i
++) {
1483 cy
= (cy1
>> 16) + dlow
* (a
[i
] & 0xffff) + (r
[i
] & 0xffff);
1484 cy1
= (cy
>> 16) + dlow
* (a
[i
]>>16) + (r
[i
] >> 16);
1485 r
[i
] = (cy
& 0xffff) | (cy1
<< 16);
1489 cy1
= r
[0] & 0xffff;
1490 for (i
= 0; i
< len
- 1; i
++) {
1491 cy
= (cy1
>> 16) + dhigh
* (a
[i
] & 0xffff) + (r
[i
] >> 16);
1492 r
[i
] = (cy1
& 0xffff) | (cy
<< 16);
1493 cy1
= (cy
>> 16) + dhigh
* (a
[i
] >> 16) + (r
[i
+ 1] & 0xffff);
1495 cy
= (cy1
>> 16) + dhigh
* (a
[len
- 1] & 0xffff) + (r
[len
- 1] >> 16);
1496 r
[len
- 1] = (cy1
& 0xffff) | (cy
<< 16);
1497 retcy
= (cy
>> 16) + dhigh
* (a
[len
- 1] >> 16) + retcy
;
1504 * r = a * digit, r and a are vectors of length len
1505 * returns the carry digit
1508 big_mul_set_vec(uint32_t *r
, uint32_t *a
, int len
, uint32_t digit
)
1513 for (i
= 0; i
< len
; i
++) {
1517 return (big_mul_add_vec(r
, a
, len
, digit
));
1521 big_sqr_vec(uint32_t *r
, uint32_t *a
, int len
)
1526 r
[len
] = big_mul_set_vec(r
, a
, len
, a
[0]);
1527 for (i
= 1; i
< len
; ++i
)
1528 r
[len
+ i
] = big_mul_add_vec(r
+ i
, a
, len
, a
[i
]);
1534 big_mul_vec(BIG_CHUNK_TYPE
*r
, BIG_CHUNK_TYPE
*a
, int alen
,
1535 BIG_CHUNK_TYPE
*b
, int blen
)
1539 r
[alen
] = big_mul_set_vec(r
, a
, alen
, b
[0]);
1540 for (i
= 1; i
< blen
; ++i
)
1541 r
[alen
+ i
] = big_mul_add_vec(r
+ i
, a
, alen
, b
[i
]);
1545 #endif /* ! PSR_MUL */
1549 * result = aa * bb result->value should be big enough to hold the result
1551 * Implementation: Standard grammar school algorithm
1555 big_mul(BIGNUM
*result
, BIGNUM
*aa
, BIGNUM
*bb
)
1558 BIG_CHUNK_TYPE tmp1value
[BIGTMPSIZE
];
1559 BIG_CHUNK_TYPE
*r
, *t
, *a
, *b
;
1561 int i
, alen
, blen
, rsize
, sign
, diff
;
1566 diff
= big_cmp_abs(aa
, bb
);
1578 while ((alen
> 1) && (a
[alen
- 1] == 0)) {
1582 while ((blen
> 1) && (b
[blen
- 1] == 0)) {
1587 rsize
= alen
+ blen
;
1589 if (result
->size
< rsize
) {
1590 err
= big_extend(result
, rsize
);
1591 if (err
!= BIG_OK
) {
1594 /* aa or bb might be an alias to result */
1600 if (((alen
== 1) && (a
[0] == 0)) || ((blen
== 1) && (b
[0] == 0))) {
1606 sign
= aa
->sign
* bb
->sign
;
1607 if ((alen
== 1) && (a
[0] == 1)) {
1608 for (i
= 0; i
< blen
; i
++) {
1612 result
->sign
= sign
;
1615 if ((blen
== 1) && (b
[0] == 1)) {
1616 for (i
= 0; i
< alen
; i
++) {
1620 result
->sign
= sign
;
1624 if ((err
= big_init1(&tmp1
, rsize
,
1625 tmp1value
, arraysize(tmp1value
))) != BIG_OK
) {
1630 for (i
= 0; i
< rsize
; i
++) {
1634 if (diff
== 0 && alen
> 2) {
1635 BIG_SQR_VEC(t
, a
, alen
);
1636 } else if (blen
> 0) {
1637 BIG_MUL_VEC(t
, a
, alen
, b
, blen
);
1640 if (t
[rsize
- 1] == 0) {
1641 tmp1
.len
= rsize
- 1;
1646 err
= big_copy(result
, &tmp1
);
1648 result
->sign
= sign
;
1658 * Montgomery multiplication.
1660 * Caller must ensure that a < n, b < n, ret->size >= 2 * n->len + 1,
1661 * and that ret is not n.
1664 big_mont_mul(BIGNUM
*ret
, BIGNUM
*a
, BIGNUM
*b
, BIGNUM
*n
, BIG_CHUNK_TYPE n0
)
1666 int i
, j
, nlen
, needsubtract
;
1667 BIG_CHUNK_TYPE
*nn
, *rr
, *rrplusi
;
1668 BIG_CHUNK_TYPE digit
, c
;
1671 #define BIG_CPU_UNKNOWN 0
1672 #define BIG_CPU_AMD 1
1673 #define BIG_CPU_INTEL 2
1674 static int big_cpu
= BIG_CPU_UNKNOWN
;
1675 BIG_CHUNK_TYPE carry
[BIGTMPSIZE
];
1677 if (big_cpu
== BIG_CPU_UNKNOWN
) {
1678 big_cpu
= 1 + bignum_on_intel();
1680 #endif /* __amd64 */
1687 if ((err
= big_mul(ret
, a
, b
)) != BIG_OK
) {
1692 for (i
= ret
->len
; i
< 2 * nlen
+ 1; i
++) {
1696 #ifdef __amd64 /* pipelining optimization for Intel 64, but not AMD64 */
1697 if ((big_cpu
== BIG_CPU_INTEL
) && (nlen
<= BIGTMPSIZE
)) {
1699 * Perform the following in two for loops to reduce the
1700 * dependency between computing the carryover bits with
1701 * BIG_MUL_ADD_VEC() and adding them, thus improving pipelining.
1703 for (i
= 0; i
< nlen
; i
++) {
1705 digit
= *rrplusi
* n0
;
1706 carry
[i
] = BIG_MUL_ADD_VEC(rrplusi
, nn
, nlen
, digit
);
1708 for (i
= 0; i
< nlen
; i
++) {
1711 while (rr
[j
] < carry
[i
]) {
1717 #endif /* __amd64 */
1718 { /* no pipelining optimization */
1719 for (i
= 0; i
< nlen
; i
++) {
1721 digit
= *rrplusi
* n0
;
1722 c
= BIG_MUL_ADD_VEC(rrplusi
, nn
, nlen
, digit
);
1733 if ((rr
[2 * nlen
] != 0))
1736 for (i
= 2 * nlen
- 1; i
>= nlen
; i
--) {
1737 if (rr
[i
] > nn
[i
- nlen
]) {
1740 } else if (rr
[i
] < nn
[i
- nlen
]) {
1746 big_sub_vec(rr
, rr
+ nlen
, nn
, nlen
);
1748 for (i
= 0; i
< nlen
; i
++) {
1749 rr
[i
] = rr
[i
+ nlen
];
1753 /* Remove leading zeros, but keep at least 1 digit: */
1754 for (i
= nlen
- 1; (i
> 0) && (rr
[i
] == 0); i
--)
1763 big_n0(BIG_CHUNK_TYPE n
)
1766 BIG_CHUNK_TYPE result
, tmp
;
1769 tmp
= BIG_CHUNK_ALLBITS
;
1770 for (i
= 0; i
< BIG_CHUNK_SIZE
; i
++) {
1771 if ((tmp
& 1) == 1) {
1772 result
= (result
>> 1) | BIG_CHUNK_HIGHBIT
;
1775 result
= (result
>> 1);
1785 big_numbits(BIGNUM
*n
)
1790 for (i
= n
->len
- 1; i
> 0; i
--) {
1791 if (n
->value
[i
] != 0) {
1796 for (j
= BIG_CHUNK_SIZE
; j
> 0; j
--) {
1797 if ((t
& BIG_CHUNK_HIGHBIT
) == 0) {
1800 return (BIG_CHUNK_SIZE
* i
+ j
);
1807 /* caller must make sure that a < n */
1809 big_mont_rr(BIGNUM
*result
, BIGNUM
*n
)
1812 BIG_CHUNK_TYPE rrvalue
[BIGTMPSIZE
];
1819 if ((err
= big_init1(&rr
, 2 * len
+ 1,
1820 rrvalue
, arraysize(rrvalue
))) != BIG_OK
) {
1824 for (i
= 0; i
< 2 * len
; i
++) {
1827 rr
.value
[2 * len
] = 1;
1828 rr
.len
= 2 * len
+ 1;
1829 if ((err
= big_div_pos(NULL
, &rr
, &rr
, n
)) != BIG_OK
) {
1832 err
= big_copy(result
, &rr
);
1839 /* caller must make sure that a < n */
1841 big_mont_conv(BIGNUM
*result
, BIGNUM
*a
, BIGNUM
*n
, BIG_CHUNK_TYPE n0
,
1845 BIG_CHUNK_TYPE rrvalue
[BIGTMPSIZE
];
1852 if ((err
= big_init1(&rr
, 2 * len
+ 1, rrvalue
, arraysize(rrvalue
)))
1858 for (i
= 0; i
< 2 * len
; i
++) {
1861 rr
.value
[2 * len
] = 1;
1862 rr
.len
= 2 * len
+ 1;
1863 if ((err
= big_div_pos(NULL
, &rr
, &rr
, n
)) != BIG_OK
) {
1869 if ((err
= big_mont_mul(&rr
, n_rr
, a
, n
, n0
)) != BIG_OK
) {
1872 err
= big_copy(result
, &rr
);
1880 #ifdef USE_FLOATING_POINT
1881 #define big_modexp_ncp_float big_modexp_ncp_sw
1883 #define big_modexp_ncp_int big_modexp_ncp_sw
1886 #define MAX_EXP_BIT_GROUP_SIZE 6
1887 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1891 big_modexp_ncp_int(BIGNUM
*result
, BIGNUM
*ma
, BIGNUM
*e
, BIGNUM
*n
,
1892 BIGNUM
*tmp
, BIG_CHUNK_TYPE n0
)
1895 BIGNUM apowers
[APOWERS_MAX_SIZE
];
1897 BIG_CHUNK_TYPE tmp1value
[BIGTMPSIZE
];
1898 int i
, j
, k
, l
, m
, p
;
1899 uint32_t bit
, bitind
, bitcount
, groupbits
, apowerssize
;
1903 nbits
= big_numbits(e
);
1908 groupbits
= MAX_EXP_BIT_GROUP_SIZE
;
1909 apowerssize
= 1 << (groupbits
- 1);
1913 if ((err
= big_init1(&tmp1
, 2 * n
->len
+ 1,
1914 tmp1value
, arraysize(tmp1value
))) != BIG_OK
) {
1918 /* clear the malloced bit to help cleanup */
1919 for (i
= 0; i
< apowerssize
; i
++) {
1920 apowers
[i
].malloced
= 0;
1923 for (i
= 0; i
< apowerssize
; i
++) {
1924 if ((err
= big_init1(&(apowers
[i
]), n
->len
, NULL
, 0)) !=
1930 (void) big_copy(&(apowers
[0]), ma
);
1932 if ((err
= big_mont_mul(&tmp1
, ma
, ma
, n
, n0
)) != BIG_OK
) {
1935 (void) big_copy(ma
, &tmp1
);
1937 for (i
= 1; i
< apowerssize
; i
++) {
1938 if ((err
= big_mont_mul(&tmp1
, ma
,
1939 &(apowers
[i
- 1]), n
, n0
)) != BIG_OK
) {
1942 (void) big_copy(&apowers
[i
], &tmp1
);
1945 bitind
= nbits
% BIG_CHUNK_SIZE
;
1950 for (i
= nbits
/ BIG_CHUNK_SIZE
; i
>= 0; i
--) {
1951 for (j
= bitind
- 1; j
>= 0; j
--) {
1952 bit
= (e
->value
[i
] >> j
) & 1;
1953 if ((bitcount
== 0) && (bit
== 0)) {
1954 if ((err
= big_mont_mul(tmp
,
1955 tmp
, tmp
, n
, n0
)) != BIG_OK
) {
1967 if (bitcount
== groupbits
) {
1968 for (m
= 0; m
< k
; m
++) {
1969 if ((err
= big_mont_mul(tmp
,
1970 tmp
, tmp
, n
, n0
)) !=
1975 if ((err
= big_mont_mul(tmp
, tmp
,
1976 &(apowers
[p
>> (l
+ 1)]),
1977 n
, n0
)) != BIG_OK
) {
1980 for (m
= 0; m
< l
; m
++) {
1981 if ((err
= big_mont_mul(tmp
,
1982 tmp
, tmp
, n
, n0
)) !=
1994 bitind
= BIG_CHUNK_SIZE
;
1997 for (m
= 0; m
< k
; m
++) {
1998 if ((err
= big_mont_mul(tmp
, tmp
, tmp
, n
, n0
)) != BIG_OK
) {
2003 if ((err
= big_mont_mul(tmp
, tmp
,
2004 &(apowers
[p
>> (l
+ 1)]), n
, n0
)) != BIG_OK
) {
2008 for (m
= 0; m
< l
; m
++) {
2009 if ((err
= big_mont_mul(result
, tmp
, tmp
, n
, n0
)) != BIG_OK
) {
2015 for (i
= apowerssize
- 1; i
>= 0; i
--) {
2016 big_finish(&(apowers
[i
]));
2024 #ifdef USE_FLOATING_POINT
2028 #include <sys/sysmacros.h>
2029 #include <sys/regset.h>
2030 #include <sys/fpu/fpusystm.h>
2032 /* the alignment for block stores to save fp registers */
2033 #define FPR_ALIGN (64)
2035 extern void big_savefp(kfpu_t
*);
2036 extern void big_restorefp(kfpu_t
*);
2038 #endif /* _KERNEL */
2041 * This version makes use of floating point for performance
2044 big_modexp_ncp_float(BIGNUM
*result
, BIGNUM
*ma
, BIGNUM
*e
, BIGNUM
*n
,
2045 BIGNUM
*tmp
, BIG_CHUNK_TYPE n0
)
2048 int i
, j
, k
, l
, m
, p
;
2049 uint32_t bit
, bitind
, bitcount
, nlen
;
2051 double *dn
, *dt
, *d16r
, *d32r
;
2052 uint32_t *nint
, *prod
;
2053 double *apowers
[APOWERS_MAX_SIZE
];
2054 uint32_t nbits
, groupbits
, apowerssize
;
2055 BIG_ERR_CODE err
= BIG_OK
;
2058 uint8_t fpua
[sizeof (kfpu_t
) + FPR_ALIGN
];
2063 return (BIG_GENERAL_ERR
);
2066 fpu
= (kfpu_t
*)P2ROUNDUP((uintptr_t)fpua
, FPR_ALIGN
);
2069 #endif /* _KERNEL */
2071 nbits
= big_numbits(e
);
2076 groupbits
= MAX_EXP_BIT_GROUP_SIZE
;
2077 apowerssize
= 1 << (groupbits
- 1);
2080 nlen
= (BIG_CHUNK_SIZE
/ 32) * n
->len
;
2081 dn0
= (double)(n0
& 0xffff);
2083 dn
= dt
= d16r
= d32r
= NULL
;
2085 for (i
= 0; i
< apowerssize
; i
++) {
2089 if ((dn
= big_malloc(nlen
* sizeof (double))) == NULL
) {
2093 if ((dt
= big_malloc((4 * nlen
+ 2) * sizeof (double))) == NULL
) {
2097 if ((nint
= big_malloc(nlen
* sizeof (uint32_t))) == NULL
) {
2101 if ((prod
= big_malloc((nlen
+ 1) * sizeof (uint32_t))) == NULL
) {
2105 if ((d16r
= big_malloc((2 * nlen
+ 1) * sizeof (double))) == NULL
) {
2109 if ((d32r
= big_malloc(nlen
* sizeof (double))) == NULL
) {
2113 for (i
= 0; i
< apowerssize
; i
++) {
2114 if ((apowers
[i
] = big_malloc((2 * nlen
+ 1) *
2115 sizeof (double))) == NULL
) {
2121 #if (BIG_CHUNK_SIZE == 32)
2122 for (i
= 0; i
< ma
->len
; i
++) {
2123 nint
[i
] = ma
->value
[i
];
2125 for (; i
< nlen
; i
++) {
2129 for (i
= 0; i
< ma
->len
; i
++) {
2130 nint
[2 * i
] = (uint32_t)(ma
->value
[i
] & 0xffffffffULL
);
2131 nint
[2 * i
+ 1] = (uint32_t)(ma
->value
[i
] >> 32);
2133 for (i
= ma
->len
* 2; i
< nlen
; i
++) {
2137 conv_i32_to_d32_and_d16(d32r
, apowers
[0], nint
, nlen
);
2139 #if (BIG_CHUNK_SIZE == 32)
2140 for (i
= 0; i
< n
->len
; i
++) {
2141 nint
[i
] = n
->value
[i
];
2143 for (; i
< nlen
; i
++) {
2147 for (i
= 0; i
< n
->len
; i
++) {
2148 nint
[2 * i
] = (uint32_t)(n
->value
[i
] & 0xffffffffULL
);
2149 nint
[2 * i
+ 1] = (uint32_t)(n
->value
[i
] >> 32);
2151 for (i
= n
->len
* 2; i
< nlen
; i
++) {
2155 conv_i32_to_d32(dn
, nint
, nlen
);
2157 mont_mulf_noconv(prod
, d32r
, apowers
[0], dt
, dn
, nint
, nlen
, dn0
);
2158 conv_i32_to_d32(d32r
, prod
, nlen
);
2159 for (i
= 1; i
< apowerssize
; i
++) {
2160 mont_mulf_noconv(prod
, d32r
, apowers
[i
- 1],
2161 dt
, dn
, nint
, nlen
, dn0
);
2162 conv_i32_to_d16(apowers
[i
], prod
, nlen
);
2165 #if (BIG_CHUNK_SIZE == 32)
2166 for (i
= 0; i
< tmp
->len
; i
++) {
2167 prod
[i
] = tmp
->value
[i
];
2169 for (; i
< nlen
+ 1; i
++) {
2173 for (i
= 0; i
< tmp
->len
; i
++) {
2174 prod
[2 * i
] = (uint32_t)(tmp
->value
[i
] & 0xffffffffULL
);
2175 prod
[2 * i
+ 1] = (uint32_t)(tmp
->value
[i
] >> 32);
2177 for (i
= tmp
->len
* 2; i
< nlen
+ 1; i
++) {
2182 bitind
= nbits
% BIG_CHUNK_SIZE
;
2187 for (i
= nbits
/ BIG_CHUNK_SIZE
; i
>= 0; i
--) {
2188 for (j
= bitind
- 1; j
>= 0; j
--) {
2189 bit
= (e
->value
[i
] >> j
) & 1;
2190 if ((bitcount
== 0) && (bit
== 0)) {
2191 conv_i32_to_d32_and_d16(d32r
, d16r
,
2193 mont_mulf_noconv(prod
, d32r
, d16r
,
2194 dt
, dn
, nint
, nlen
, dn0
);
2204 if (bitcount
== groupbits
) {
2205 for (m
= 0; m
< k
; m
++) {
2206 conv_i32_to_d32_and_d16(d32r
,
2208 mont_mulf_noconv(prod
, d32r
,
2212 conv_i32_to_d32(d32r
, prod
, nlen
);
2213 mont_mulf_noconv(prod
, d32r
,
2214 apowers
[p
>> (l
+ 1)],
2215 dt
, dn
, nint
, nlen
, dn0
);
2216 for (m
= 0; m
< l
; m
++) {
2217 conv_i32_to_d32_and_d16(d32r
,
2219 mont_mulf_noconv(prod
, d32r
,
2230 bitind
= BIG_CHUNK_SIZE
;
2233 for (m
= 0; m
< k
; m
++) {
2234 conv_i32_to_d32_and_d16(d32r
, d16r
, prod
, nlen
);
2235 mont_mulf_noconv(prod
, d32r
, d16r
, dt
, dn
, nint
, nlen
, dn0
);
2238 conv_i32_to_d32(d32r
, prod
, nlen
);
2239 mont_mulf_noconv(prod
, d32r
, apowers
[p
>> (l
+ 1)],
2240 dt
, dn
, nint
, nlen
, dn0
);
2242 for (m
= 0; m
< l
; m
++) {
2243 conv_i32_to_d32_and_d16(d32r
, d16r
, prod
, nlen
);
2244 mont_mulf_noconv(prod
, d32r
, d16r
, dt
, dn
, nint
, nlen
, dn0
);
2247 #if (BIG_CHUNK_SIZE == 32)
2248 for (i
= 0; i
< nlen
; i
++) {
2249 result
->value
[i
] = prod
[i
];
2251 for (i
= nlen
- 1; (i
> 0) && (prod
[i
] == 0); i
--)
2254 for (i
= 0; i
< nlen
/ 2; i
++) {
2255 result
->value
[i
] = (uint64_t)(prod
[2 * i
]) +
2256 (((uint64_t)(prod
[2 * i
+ 1])) << 32);
2258 for (i
= nlen
/ 2 - 1; (i
> 0) && (result
->value
[i
] == 0); i
--)
2261 result
->len
= i
+ 1;
2264 for (i
= apowerssize
- 1; i
>= 0; i
--) {
2265 if (apowers
[i
] != NULL
)
2266 big_free(apowers
[i
], (2 * nlen
+ 1) * sizeof (double));
2269 big_free(d32r
, nlen
* sizeof (double));
2272 big_free(d16r
, (2 * nlen
+ 1) * sizeof (double));
2275 big_free(prod
, (nlen
+ 1) * sizeof (uint32_t));
2278 big_free(nint
, nlen
* sizeof (uint32_t));
2281 big_free(dt
, (4 * nlen
+ 2) * sizeof (double));
2284 big_free(dn
, nlen
* sizeof (double));
2294 #endif /* USE_FLOATING_POINT */
2298 big_modexp_ext(BIGNUM
*result
, BIGNUM
*a
, BIGNUM
*e
, BIGNUM
*n
, BIGNUM
*n_rr
,
2299 big_modexp_ncp_info_t
*info
)
2302 BIG_CHUNK_TYPE mavalue
[BIGTMPSIZE
];
2303 BIG_CHUNK_TYPE tmpvalue
[BIGTMPSIZE
];
2304 BIG_CHUNK_TYPE rrvalue
[BIGTMPSIZE
];
2308 if ((err
= big_init1(&ma
, n
->len
, mavalue
, arraysize(mavalue
))) !=
2315 if ((err
= big_init1(&tmp
, 2 * n
->len
+ 1,
2316 tmpvalue
, arraysize(tmpvalue
))) != BIG_OK
) {
2320 /* clear the malloced bit to help cleanup */
2324 if ((err
= big_init1(&rr
, 2 * n
->len
+ 1,
2325 rrvalue
, arraysize(rrvalue
))) != BIG_OK
) {
2328 if (big_mont_rr(&rr
, n
) != BIG_OK
) {
2334 n0
= big_n0(n
->value
[0]);
2336 if (big_cmp_abs(a
, n
) > 0) {
2337 if ((err
= big_div_pos(NULL
, &ma
, a
, n
)) != BIG_OK
) {
2340 err
= big_mont_conv(&ma
, &ma
, n
, n0
, n_rr
);
2342 err
= big_mont_conv(&ma
, a
, n
, n0
, n_rr
);
2344 if (err
!= BIG_OK
) {
2350 if ((err
= big_mont_conv(&tmp
, &tmp
, n
, n0
, n_rr
)) != BIG_OK
) {
2354 if ((info
!= NULL
) && (info
->func
!= NULL
)) {
2355 err
= (*(info
->func
))(&tmp
, &ma
, e
, n
, &tmp
, n0
,
2356 info
->ncp
, info
->reqp
);
2358 err
= big_modexp_ncp_sw(&tmp
, &ma
, e
, n
, &tmp
, n0
);
2360 if (err
!= BIG_OK
) {
2366 if ((err
= big_mont_mul(&tmp
, &tmp
, &ma
, n
, n0
)) != BIG_OK
) {
2369 err
= big_copy(result
, &tmp
);
2384 big_modexp(BIGNUM
*result
, BIGNUM
*a
, BIGNUM
*e
, BIGNUM
*n
, BIGNUM
*n_rr
)
2386 return (big_modexp_ext(result
, a
, e
, n
, n_rr
, NULL
));
2391 big_modexp_crt_ext(BIGNUM
*result
, BIGNUM
*a
, BIGNUM
*dmodpminus1
,
2392 BIGNUM
*dmodqminus1
, BIGNUM
*p
, BIGNUM
*q
, BIGNUM
*pinvmodq
,
2393 BIGNUM
*p_rr
, BIGNUM
*q_rr
, big_modexp_ncp_info_t
*info
)
2396 int alen
, biglen
, sign
;
2399 if (p
->len
> q
->len
) {
2405 if ((err
= big_init1(&ap
, p
->len
, NULL
, 0)) != BIG_OK
) {
2408 if ((err
= big_init1(&aq
, q
->len
, NULL
, 0)) != BIG_OK
) {
2411 if ((err
= big_init1(&tmp
, biglen
+ q
->len
+ 1, NULL
, 0)) != BIG_OK
) {
2416 * check whether a is too short - to avoid timing attacks
2419 while ((alen
> p
->len
) && (a
->value
[alen
- 1] == 0)) {
2422 if (alen
< p
->len
+ q
->len
) {
2424 * a is too short, add p*q to it before
2425 * taking it modulo p and q
2426 * this will also affect timing, but this difference
2427 * does not depend on p or q, only on a
2428 * (in "normal" operation, this path will never be
2429 * taken, so it is not a performance penalty
2431 if ((err
= big_mul(&tmp
, p
, q
)) != BIG_OK
) {
2434 if ((err
= big_add(&tmp
, &tmp
, a
)) != BIG_OK
) {
2437 if ((err
= big_div_pos(NULL
, &ap
, &tmp
, p
)) != BIG_OK
) {
2440 if ((err
= big_div_pos(NULL
, &aq
, &tmp
, q
)) != BIG_OK
) {
2444 if ((err
= big_div_pos(NULL
, &ap
, a
, p
)) != BIG_OK
) {
2447 if ((err
= big_div_pos(NULL
, &aq
, a
, q
)) != BIG_OK
) {
2452 if ((err
= big_modexp_ext(&ap
, &ap
, dmodpminus1
, p
, p_rr
, info
)) !=
2456 if ((err
= big_modexp_ext(&aq
, &aq
, dmodqminus1
, q
, q_rr
, info
)) !=
2460 if ((err
= big_sub(&tmp
, &aq
, &ap
)) != BIG_OK
) {
2463 if ((err
= big_mul(&tmp
, &tmp
, pinvmodq
)) != BIG_OK
) {
2468 if ((err
= big_div_pos(NULL
, &aq
, &tmp
, q
)) != BIG_OK
) {
2471 if ((sign
== -1) && (!big_is_zero(&aq
))) {
2472 (void) big_sub_pos(&aq
, q
, &aq
);
2474 if ((err
= big_mul(&tmp
, &aq
, p
)) != BIG_OK
) {
2477 err
= big_add_abs(result
, &ap
, &tmp
);
2491 big_modexp_crt(BIGNUM
*result
, BIGNUM
*a
, BIGNUM
*dmodpminus1
,
2492 BIGNUM
*dmodqminus1
, BIGNUM
*p
, BIGNUM
*q
, BIGNUM
*pinvmodq
,
2493 BIGNUM
*p_rr
, BIGNUM
*q_rr
)
2495 return (big_modexp_crt_ext(result
, a
, dmodpminus1
, dmodqminus1
,
2496 p
, q
, pinvmodq
, p_rr
, q_rr
, NULL
));
2500 static BIG_CHUNK_TYPE onearr
[1] = {(BIG_CHUNK_TYPE
)1};
2501 #if !defined(NO_BIG_ONE)
2502 BIGNUM big_One
= {1, 1, 1, 0, onearr
};
2505 static BIG_CHUNK_TYPE twoarr
[1] = {(BIG_CHUNK_TYPE
)2};
2506 #if !defined(NO_BIG_TWO)
2507 BIGNUM big_Two
= {1, 1, 1, 0, twoarr
};
2510 static BIG_CHUNK_TYPE fourarr
[1] = {(BIG_CHUNK_TYPE
)4};
2511 static BIGNUM big_Four
= {1, 1, 1, 0, fourarr
};
2515 big_sqrt_pos(BIGNUM
*result
, BIGNUM
*n
)
2517 BIGNUM
*high
, *low
, *mid
, *t
;
2518 BIGNUM t1
, t2
, t3
, prod
;
2519 BIG_CHUNK_TYPE t1value
[BIGTMPSIZE
];
2520 BIG_CHUNK_TYPE t2value
[BIGTMPSIZE
];
2521 BIG_CHUNK_TYPE t3value
[BIGTMPSIZE
];
2522 BIG_CHUNK_TYPE prodvalue
[BIGTMPSIZE
];
2524 uint32_t nbits
, nrootbits
, highbits
;
2527 nbits
= big_numbits(n
);
2529 if ((err
= big_init1(&t1
, n
->len
+ 1,
2530 t1value
, arraysize(t1value
))) != BIG_OK
)
2532 if ((err
= big_init1(&t2
, n
->len
+ 1,
2533 t2value
, arraysize(t2value
))) != BIG_OK
)
2535 if ((err
= big_init1(&t3
, n
->len
+ 1,
2536 t3value
, arraysize(t3value
))) != BIG_OK
)
2538 if ((err
= big_init1(&prod
, n
->len
+ 1,
2539 prodvalue
, arraysize(prodvalue
))) != BIG_OK
)
2542 nrootbits
= (nbits
+ 1) / 2;
2543 t1
.len
= t2
.len
= t3
.len
= (nrootbits
- 1) / BIG_CHUNK_SIZE
+ 1;
2544 for (i
= 0; i
< t1
.len
; i
++) {
2546 t2
.value
[i
] = BIG_CHUNK_ALLBITS
;
2548 highbits
= nrootbits
- BIG_CHUNK_SIZE
* (t1
.len
- 1);
2549 if (highbits
== BIG_CHUNK_SIZE
) {
2550 t1
.value
[t1
.len
- 1] = BIG_CHUNK_HIGHBIT
;
2551 t2
.value
[t2
.len
- 1] = BIG_CHUNK_ALLBITS
;
2553 t1
.value
[t1
.len
- 1] = (BIG_CHUNK_TYPE
)1 << (highbits
- 1);
2554 t2
.value
[t2
.len
- 1] = 2 * t1
.value
[t1
.len
- 1] - 1;
2561 if ((err
= big_mul(&prod
, high
, high
)) != BIG_OK
) {
2564 diff
= big_cmp_abs(&prod
, n
);
2566 err
= big_copy(result
, high
);
2570 (void) big_sub_pos(mid
, high
, low
);
2571 while (big_cmp_abs(&big_One
, mid
) != 0) {
2572 (void) big_add_abs(mid
, high
, low
);
2573 (void) big_half_pos(mid
, mid
);
2574 if ((err
= big_mul(&prod
, mid
, mid
)) != BIG_OK
)
2576 diff
= big_cmp_abs(&prod
, n
);
2581 } else if (diff
< 0) {
2586 err
= big_copy(result
, low
);
2589 (void) big_sub_pos(mid
, high
, low
);
2592 err
= big_copy(result
, low
);
2594 if (prod
.malloced
) big_finish(&prod
);
2596 if (t3
.malloced
) big_finish(&t3
);
2598 if (t2
.malloced
) big_finish(&t2
);
2600 if (t1
.malloced
) big_finish(&t1
);
2607 big_Jacobi_pos(int *jac
, BIGNUM
*nn
, BIGNUM
*mm
)
2609 BIGNUM
*t
, *tmp2
, *m
, *n
;
2611 BIG_CHUNK_TYPE t1value
[BIGTMPSIZE
];
2612 BIG_CHUNK_TYPE t2value
[BIGTMPSIZE
];
2613 BIG_CHUNK_TYPE t3value
[BIGTMPSIZE
];
2616 if (big_is_zero(nn
) ||
2617 (((nn
->value
[0] & 1) | (mm
->value
[0] & 1)) == 0)) {
2622 if (nn
->len
> mm
->len
) {
2628 if ((err
= big_init1(&t1
, len
,
2629 t1value
, arraysize(t1value
))) != BIG_OK
) {
2632 if ((err
= big_init1(&t2
, len
,
2633 t2value
, arraysize(t2value
))) != BIG_OK
) {
2636 if ((err
= big_init1(&t3
, len
,
2637 t3value
, arraysize(t3value
))) != BIG_OK
) {
2645 (void) big_copy(n
, nn
);
2646 (void) big_copy(m
, mm
);
2649 while (big_cmp_abs(&big_One
, m
) != 0) {
2650 if (big_is_zero(n
)) {
2654 if ((m
->value
[0] & 1) == 0) {
2655 if (((n
->value
[0] & 7) == 3) ||
2656 ((n
->value
[0] & 7) == 5))
2658 (void) big_half_pos(m
, m
);
2659 } else if ((n
->value
[0] & 1) == 0) {
2660 if (((m
->value
[0] & 7) == 3) ||
2661 ((m
->value
[0] & 7) == 5))
2663 (void) big_half_pos(n
, n
);
2665 if (((m
->value
[0] & 3) == 3) &&
2666 ((n
->value
[0] & 3) == 3)) {
2669 if ((err
= big_div_pos(NULL
, tmp2
, m
, n
)) != BIG_OK
) {
2681 if (t3
.malloced
) big_finish(&t3
);
2683 if (t2
.malloced
) big_finish(&t2
);
2685 if (t1
.malloced
) big_finish(&t1
);
2692 big_Lucas(BIGNUM
*Lkminus1
, BIGNUM
*Lk
, BIGNUM
*p
, BIGNUM
*k
, BIGNUM
*n
)
2697 BIGNUM ki
, tmp
, tmp2
;
2698 BIG_CHUNK_TYPE kivalue
[BIGTMPSIZE
];
2699 BIG_CHUNK_TYPE tmpvalue
[BIGTMPSIZE
];
2700 BIG_CHUNK_TYPE tmp2value
[BIGTMPSIZE
];
2703 if (big_cmp_abs(k
, &big_One
) == 0) {
2704 (void) big_copy(Lk
, p
);
2705 (void) big_copy(Lkminus1
, &big_Two
);
2709 if ((err
= big_init1(&ki
, k
->len
+ 1,
2710 kivalue
, arraysize(kivalue
))) != BIG_OK
)
2713 if ((err
= big_init1(&tmp
, 2 * n
->len
+ 1,
2714 tmpvalue
, arraysize(tmpvalue
))) != BIG_OK
)
2717 if ((err
= big_init1(&tmp2
, n
->len
,
2718 tmp2value
, arraysize(tmp2value
))) != BIG_OK
)
2722 ki
.len
= (m
- 1) / BIG_CHUNK_SIZE
+ 1;
2723 w
= (m
- 1) / BIG_CHUNK_SIZE
;
2724 bit
= (BIG_CHUNK_TYPE
)1 << ((m
- 1) % BIG_CHUNK_SIZE
);
2725 for (i
= 0; i
< ki
.len
; i
++) {
2728 ki
.value
[ki
.len
- 1] = bit
;
2729 if (big_cmp_abs(k
, &ki
) != 0) {
2730 (void) big_double(&ki
, &ki
);
2732 (void) big_sub_pos(&ki
, &ki
, k
);
2734 (void) big_copy(Lk
, p
);
2735 (void) big_copy(Lkminus1
, &big_Two
);
2737 for (i
= 0; i
< m
; i
++) {
2738 if ((err
= big_mul(&tmp
, Lk
, Lkminus1
)) != BIG_OK
) {
2741 (void) big_add_abs(&tmp
, &tmp
, n
);
2742 (void) big_sub_pos(&tmp
, &tmp
, p
);
2743 if ((err
= big_div_pos(NULL
, &tmp2
, &tmp
, n
)) != BIG_OK
) {
2746 if ((ki
.value
[w
] & bit
) != 0) {
2747 if ((err
= big_mul(&tmp
, Lkminus1
, Lkminus1
)) !=
2751 (void) big_add_abs(&tmp
, &tmp
, n
);
2752 (void) big_sub_pos(&tmp
, &tmp
, &big_Two
);
2753 if ((err
= big_div_pos(NULL
, Lkminus1
, &tmp
, n
)) !=
2757 (void) big_copy(Lk
, &tmp2
);
2759 if ((err
= big_mul(&tmp
, Lk
, Lk
)) != BIG_OK
) {
2762 (void) big_add_abs(&tmp
, &tmp
, n
);
2763 (void) big_sub_pos(&tmp
, &tmp
, &big_Two
);
2764 if ((err
= big_div_pos(NULL
, Lk
, &tmp
, n
)) != BIG_OK
) {
2767 (void) big_copy(Lkminus1
, &tmp2
);
2771 bit
= BIG_CHUNK_HIGHBIT
;
2779 if (tmp2
.malloced
) big_finish(&tmp2
);
2781 if (tmp
.malloced
) big_finish(&tmp
);
2783 if (ki
.malloced
) big_finish(&ki
);
2790 big_isprime_pos_ext(BIGNUM
*n
, big_modexp_ncp_info_t
*info
)
2792 BIGNUM o
, nminus1
, tmp
, Lkminus1
, Lk
;
2793 BIG_CHUNK_TYPE ovalue
[BIGTMPSIZE
];
2794 BIG_CHUNK_TYPE nminus1value
[BIGTMPSIZE
];
2795 BIG_CHUNK_TYPE tmpvalue
[BIGTMPSIZE
];
2796 BIG_CHUNK_TYPE Lkminus1value
[BIGTMPSIZE
];
2797 BIG_CHUNK_TYPE Lkvalue
[BIGTMPSIZE
];
2801 if (big_cmp_abs(n
, &big_One
) == 0) {
2804 if (big_cmp_abs(n
, &big_Two
) == 0) {
2807 if ((n
->value
[0] & 1) == 0) {
2811 if ((err
= big_init1(&o
, n
->len
, ovalue
, arraysize(ovalue
))) !=
2816 if ((err
= big_init1(&nminus1
, n
->len
,
2817 nminus1value
, arraysize(nminus1value
))) != BIG_OK
) {
2821 if ((err
= big_init1(&tmp
, 2 * n
->len
,
2822 tmpvalue
, arraysize(tmpvalue
))) != BIG_OK
) {
2826 if ((err
= big_init1(&Lkminus1
, n
->len
,
2827 Lkminus1value
, arraysize(Lkminus1value
))) != BIG_OK
) {
2831 if ((err
= big_init1(&Lk
, n
->len
,
2832 Lkvalue
, arraysize(Lkvalue
))) != BIG_OK
) {
2836 (void) big_sub_pos(&o
, n
, &big_One
); /* cannot fail */
2837 (void) big_copy(&nminus1
, &o
); /* cannot fail */
2839 while ((o
.value
[0] & 1) == 0) {
2841 (void) big_half_pos(&o
, &o
); /* cannot fail */
2843 if ((err
= big_modexp_ext(&tmp
, &big_Two
, &o
, n
, NULL
, info
)) !=
2850 (big_cmp_abs(&tmp
, &big_One
) != 0) &&
2851 (big_cmp_abs(&tmp
, &nminus1
) != 0)) {
2853 big_modexp_ext(&tmp
, &tmp
, &big_Two
, n
, NULL
, info
)) !=
2859 if (!((big_cmp_abs(&tmp
, &nminus1
) == 0) ||
2860 ((i
== 0) && (big_cmp_abs(&tmp
, &big_One
) == 0)))) {
2865 if ((err
= big_sqrt_pos(&tmp
, n
)) != BIG_OK
) {
2869 if ((err
= big_mul(&tmp
, &tmp
, &tmp
)) != BIG_OK
) {
2872 if (big_cmp_abs(&tmp
, n
) == 0) {
2877 (void) big_copy(&o
, &big_Two
);
2879 (void) big_add_abs(&o
, &o
, &big_One
);
2880 if ((err
= big_mul(&tmp
, &o
, &o
)) != BIG_OK
) {
2883 (void) big_sub_pos(&tmp
, &tmp
, &big_Four
);
2884 if ((err
= big_Jacobi_pos(&jac
, &tmp
, n
)) != BIG_OK
) {
2887 } while (jac
!= -1);
2889 (void) big_add_abs(&tmp
, n
, &big_One
);
2890 if ((err
= big_Lucas(&Lkminus1
, &Lk
, &o
, &tmp
, n
)) != BIG_OK
) {
2894 if ((big_cmp_abs(&Lkminus1
, &o
) == 0) &&
2895 (big_cmp_abs(&Lk
, &big_Two
) == 0)) {
2902 if (Lk
.malloced
) big_finish(&Lk
);
2904 if (Lkminus1
.malloced
) big_finish(&Lkminus1
);
2906 if (tmp
.malloced
) big_finish(&tmp
);
2908 if (nminus1
.malloced
) big_finish(&nminus1
);
2910 if (o
.malloced
) big_finish(&o
);
2917 big_isprime_pos(BIGNUM
*n
)
2919 return (big_isprime_pos_ext(n
, NULL
));
2923 #define SIEVESIZE 1000
2927 big_nextprime_pos_ext(BIGNUM
*result
, BIGNUM
*n
, big_modexp_ncp_info_t
*info
)
2929 static const uint32_t smallprimes
[] = {
2930 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2931 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2933 int sieve
[SIEVESIZE
];
2937 if ((err
= big_copy(result
, n
)) != BIG_OK
) {
2940 result
->value
[0] |= 1;
2943 for (i
= 0; i
< SIEVESIZE
; i
++) sieve
[i
] = 0;
2945 i
< sizeof (smallprimes
) / sizeof (smallprimes
[0]); i
++) {
2947 off
= big_modhalf_pos(result
, p
);
2949 if ((off
% 2) == 1) {
2953 while (off
< SIEVESIZE
) {
2959 for (i
= 0; i
< SIEVESIZE
; i
++) {
2960 if (sieve
[i
] == 0) {
2961 err
= big_isprime_pos_ext(result
, info
);
2962 if (err
!= BIG_FALSE
) {
2963 if (err
!= BIG_TRUE
) {
2971 if ((err
= big_add_abs(result
, result
, &big_Two
)) !=
2984 big_nextprime_pos(BIGNUM
*result
, BIGNUM
*n
)
2986 return (big_nextprime_pos_ext(result
, n
, NULL
));
2991 big_nextprime_pos_slow(BIGNUM
*result
, BIGNUM
*n
)
2996 if ((err
= big_copy(result
, n
)) != BIG_OK
)
2998 result
->value
[0] |= 1;
2999 while ((err
= big_isprime_pos_ext(result
, NULL
)) != BIG_TRUE
) {
3000 if (err
!= BIG_FALSE
)
3002 if ((err
= big_add_abs(result
, result
, &big_Two
)) != BIG_OK
)
3010 * given m and e, computes the rest in the equation
3011 * gcd(m, e) = cm * m + ce * e
3014 big_ext_gcd_pos(BIGNUM
*gcd
, BIGNUM
*cm
, BIGNUM
*ce
, BIGNUM
*m
, BIGNUM
*e
)
3016 BIGNUM
*xi
, *ri
, *riminus1
, *riminus2
, *t
;
3017 BIGNUM
*vmi
, *vei
, *vmiminus1
, *veiminus1
;
3018 BIGNUM t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, tmp
;
3019 BIG_CHUNK_TYPE t1value
[BIGTMPSIZE
];
3020 BIG_CHUNK_TYPE t2value
[BIGTMPSIZE
];
3021 BIG_CHUNK_TYPE t3value
[BIGTMPSIZE
];
3022 BIG_CHUNK_TYPE t4value
[BIGTMPSIZE
];
3023 BIG_CHUNK_TYPE t5value
[BIGTMPSIZE
];
3024 BIG_CHUNK_TYPE t6value
[BIGTMPSIZE
];
3025 BIG_CHUNK_TYPE t7value
[BIGTMPSIZE
];
3026 BIG_CHUNK_TYPE t8value
[BIGTMPSIZE
];
3027 BIG_CHUNK_TYPE tmpvalue
[BIGTMPSIZE
];
3031 if (big_cmp_abs(m
, e
) >= 0) {
3037 if ((err
= big_init1(&t1
, len
,
3038 t1value
, arraysize(t1value
))) != BIG_OK
) {
3041 if ((err
= big_init1(&t2
, len
,
3042 t2value
, arraysize(t2value
))) != BIG_OK
) {
3045 if ((err
= big_init1(&t3
, len
,
3046 t3value
, arraysize(t3value
))) != BIG_OK
) {
3049 if ((err
= big_init1(&t4
, len
,
3050 t4value
, arraysize(t3value
))) != BIG_OK
) {
3053 if ((err
= big_init1(&t5
, len
,
3054 t5value
, arraysize(t5value
))) != BIG_OK
) {
3057 if ((err
= big_init1(&t6
, len
,
3058 t6value
, arraysize(t6value
))) != BIG_OK
) {
3061 if ((err
= big_init1(&t7
, len
,
3062 t7value
, arraysize(t7value
))) != BIG_OK
) {
3065 if ((err
= big_init1(&t8
, len
,
3066 t8value
, arraysize(t8value
))) != BIG_OK
) {
3070 if ((err
= big_init1(&tmp
, 2 * len
,
3071 tmpvalue
, arraysize(tmpvalue
))) != BIG_OK
) {
3086 (void) big_copy(vmiminus1
, &big_One
);
3087 (void) big_copy(vmi
, &big_One
);
3088 (void) big_copy(veiminus1
, &big_One
);
3089 (void) big_copy(xi
, &big_One
);
3093 (void) big_copy(riminus1
, m
);
3094 (void) big_copy(ri
, e
);
3096 while (!big_is_zero(ri
)) {
3098 riminus2
= riminus1
;
3101 if ((err
= big_mul(&tmp
, vmi
, xi
)) != BIG_OK
) {
3104 if ((err
= big_sub(vmiminus1
, vmiminus1
, &tmp
)) != BIG_OK
) {
3110 if ((err
= big_mul(&tmp
, vei
, xi
)) != BIG_OK
) {
3113 if ((err
= big_sub(veiminus1
, veiminus1
, &tmp
)) != BIG_OK
) {
3119 if ((err
= big_div_pos(xi
, ri
, riminus2
, riminus1
)) !=
3124 if ((gcd
!= NULL
) && ((err
= big_copy(gcd
, riminus1
)) != BIG_OK
)) {
3127 if ((cm
!= NULL
) && ((err
= big_copy(cm
, vmi
)) != BIG_OK
)) {
3131 err
= big_copy(ce
, vei
);
3134 if (tmp
.malloced
) big_finish(&tmp
);
3136 if (t8
.malloced
) big_finish(&t8
);
3138 if (t7
.malloced
) big_finish(&t7
);
3140 if (t6
.malloced
) big_finish(&t6
);
3142 if (t5
.malloced
) big_finish(&t5
);
3144 if (t4
.malloced
) big_finish(&t4
);
3146 if (t3
.malloced
) big_finish(&t3
);
3148 if (t2
.malloced
) big_finish(&t2
);
3150 if (t1
.malloced
) big_finish(&t1
);
3156 * Get a rlen-bit random number in BIGNUM format. Caller-supplied
3157 * (*rfunc)(void *dbuf, size_t dlen) must return 0 for success and
3158 * -1 for failure. Note: (*rfunc)() takes length in bytes, not bits.
3161 big_random(BIGNUM
*r
, size_t rlen
, int (*rfunc
)(void *, size_t))
3163 size_t rwords
, rbytes
;
3166 if (r
== NULL
|| rlen
== 0 || rfunc
== NULL
)
3167 return (BIG_INVALID_ARGS
);
3170 * Convert rlen bits to r->len words (32- or 64-bit), rbytes bytes
3171 * and extend r if it's not big enough to hold the random number.
3173 rwords
= BITLEN2BIGNUMLEN(rlen
);
3174 rbytes
= rwords
* sizeof (BIG_CHUNK_TYPE
);
3175 if (big_extend(r
, rwords
) != BIG_OK
)
3176 return (BIG_NO_MEM
);
3177 #ifdef BIGNUM_CHUNK_32
3180 r
->len
= (uint32_t)rwords
;
3183 if ((*rfunc
)(r
->value
, rbytes
) < 0)
3184 return (BIG_NO_RANDOM
);
3186 r
->value
[rwords
- 1] |= BIG_CHUNK_HIGHBIT
;
3189 * If the bit length is not a word boundary, shift the most
3190 * significant word so that we have an exactly rlen-long number.
3192 if ((shift
= rlen
% BIG_CHUNK_SIZE
) != 0)
3193 r
->value
[rwords
- 1] >>= (BIG_CHUNK_SIZE
- shift
);
3195 r
->sign
= 1; /* non-negative */