2 * ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is the Multi-precision Binary Polynomial Arithmetic Library.
17 * The Initial Developer of the Original Code is
18 * Sun Microsystems, Inc.
19 * Portions created by the Initial Developer are Copyright (C) 2003
20 * the Initial Developer. All Rights Reserved.
23 * Sheueling Chang Shantz <sheueling.chang@sun.com> and
24 * Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
41 * Use is subject to license terms.
43 * Sun elects to use this software under the MPL license.
46 #pragma ident "%Z%%M% %I% %E% SMI"
49 #include "mp_gf2m-priv.h"
53 const mp_digit mp_gf2m_sqr_tb
[16] =
55 0, 1, 4, 5, 16, 17, 20, 21,
56 64, 65, 68, 69, 80, 81, 84, 85
59 /* Multiply two binary polynomials mp_digits a, b.
60 * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
61 * Output in two mp_digits rh, rl.
63 #if MP_DIGIT_BITS == 32
65 s_bmul_1x1(mp_digit
*rh
, mp_digit
*rl
, const mp_digit a
, const mp_digit b
)
67 register mp_digit h
, l
, s
;
68 mp_digit tab
[8], top2b
= a
>> 30;
69 register mp_digit a1
, a2
, a4
;
71 a1
= a
& (0x3FFFFFFF); a2
= a1
<< 1; a4
= a2
<< 1;
73 tab
[0] = 0; tab
[1] = a1
; tab
[2] = a2
; tab
[3] = a1
^a2
;
74 tab
[4] = a4
; tab
[5] = a1
^a4
; tab
[6] = a2
^a4
; tab
[7] = a1
^a2
^a4
;
76 s
= tab
[b
& 0x7]; l
= s
;
77 s
= tab
[b
>> 3 & 0x7]; l
^= s
<< 3; h
= s
>> 29;
78 s
= tab
[b
>> 6 & 0x7]; l
^= s
<< 6; h
^= s
>> 26;
79 s
= tab
[b
>> 9 & 0x7]; l
^= s
<< 9; h
^= s
>> 23;
80 s
= tab
[b
>> 12 & 0x7]; l
^= s
<< 12; h
^= s
>> 20;
81 s
= tab
[b
>> 15 & 0x7]; l
^= s
<< 15; h
^= s
>> 17;
82 s
= tab
[b
>> 18 & 0x7]; l
^= s
<< 18; h
^= s
>> 14;
83 s
= tab
[b
>> 21 & 0x7]; l
^= s
<< 21; h
^= s
>> 11;
84 s
= tab
[b
>> 24 & 0x7]; l
^= s
<< 24; h
^= s
>> 8;
85 s
= tab
[b
>> 27 & 0x7]; l
^= s
<< 27; h
^= s
>> 5;
86 s
= tab
[b
>> 30 ]; l
^= s
<< 30; h
^= s
>> 2;
88 /* compensate for the top two bits of a */
90 if (top2b
& 01) { l
^= b
<< 30; h
^= b
>> 2; }
91 if (top2b
& 02) { l
^= b
<< 31; h
^= b
>> 1; }
97 s_bmul_1x1(mp_digit
*rh
, mp_digit
*rl
, const mp_digit a
, const mp_digit b
)
99 register mp_digit h
, l
, s
;
100 mp_digit tab
[16], top3b
= a
>> 61;
101 register mp_digit a1
, a2
, a4
, a8
;
103 a1
= a
& (0x1FFFFFFFFFFFFFFFULL
); a2
= a1
<< 1;
104 a4
= a2
<< 1; a8
= a4
<< 1;
105 tab
[ 0] = 0; tab
[ 1] = a1
; tab
[ 2] = a2
; tab
[ 3] = a1
^a2
;
106 tab
[ 4] = a4
; tab
[ 5] = a1
^a4
; tab
[ 6] = a2
^a4
; tab
[ 7] = a1
^a2
^a4
;
107 tab
[ 8] = a8
; tab
[ 9] = a1
^a8
; tab
[10] = a2
^a8
; tab
[11] = a1
^a2
^a8
;
108 tab
[12] = a4
^a8
; tab
[13] = a1
^a4
^a8
; tab
[14] = a2
^a4
^a8
; tab
[15] = a1
^a2
^a4
^a8
;
110 s
= tab
[b
& 0xF]; l
= s
;
111 s
= tab
[b
>> 4 & 0xF]; l
^= s
<< 4; h
= s
>> 60;
112 s
= tab
[b
>> 8 & 0xF]; l
^= s
<< 8; h
^= s
>> 56;
113 s
= tab
[b
>> 12 & 0xF]; l
^= s
<< 12; h
^= s
>> 52;
114 s
= tab
[b
>> 16 & 0xF]; l
^= s
<< 16; h
^= s
>> 48;
115 s
= tab
[b
>> 20 & 0xF]; l
^= s
<< 20; h
^= s
>> 44;
116 s
= tab
[b
>> 24 & 0xF]; l
^= s
<< 24; h
^= s
>> 40;
117 s
= tab
[b
>> 28 & 0xF]; l
^= s
<< 28; h
^= s
>> 36;
118 s
= tab
[b
>> 32 & 0xF]; l
^= s
<< 32; h
^= s
>> 32;
119 s
= tab
[b
>> 36 & 0xF]; l
^= s
<< 36; h
^= s
>> 28;
120 s
= tab
[b
>> 40 & 0xF]; l
^= s
<< 40; h
^= s
>> 24;
121 s
= tab
[b
>> 44 & 0xF]; l
^= s
<< 44; h
^= s
>> 20;
122 s
= tab
[b
>> 48 & 0xF]; l
^= s
<< 48; h
^= s
>> 16;
123 s
= tab
[b
>> 52 & 0xF]; l
^= s
<< 52; h
^= s
>> 12;
124 s
= tab
[b
>> 56 & 0xF]; l
^= s
<< 56; h
^= s
>> 8;
125 s
= tab
[b
>> 60 ]; l
^= s
<< 60; h
^= s
>> 4;
127 /* compensate for the top three bits of a */
129 if (top3b
& 01) { l
^= b
<< 61; h
^= b
>> 3; }
130 if (top3b
& 02) { l
^= b
<< 62; h
^= b
>> 2; }
131 if (top3b
& 04) { l
^= b
<< 63; h
^= b
>> 1; }
137 /* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
138 * result is a binary polynomial in 4 mp_digits r[4].
139 * The caller MUST ensure that r has the right amount of space allocated.
142 s_bmul_2x2(mp_digit
*r
, const mp_digit a1
, const mp_digit a0
, const mp_digit b1
,
146 /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
147 s_bmul_1x1(r
+3, r
+2, a1
, b1
);
148 s_bmul_1x1(r
+1, r
, a0
, b0
);
149 s_bmul_1x1(&m1
, &m0
, a0
^ a1
, b0
^ b1
);
150 /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
151 r
[2] ^= m1
^ r
[1] ^ r
[3]; /* h0 ^= m1 ^ l1 ^ h1; */
152 r
[1] = r
[3] ^ r
[2] ^ r
[0] ^ m1
^ m0
; /* l1 ^= l0 ^ h0 ^ m0; */
155 /* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
156 * result is a binary polynomial in 6 mp_digits r[6].
157 * The caller MUST ensure that r has the right amount of space allocated.
160 s_bmul_3x3(mp_digit
*r
, const mp_digit a2
, const mp_digit a1
, const mp_digit a0
,
161 const mp_digit b2
, const mp_digit b1
, const mp_digit b0
)
165 s_bmul_1x1(r
+5, r
+4, a2
, b2
); /* fill top 2 words */
166 s_bmul_2x2(zm
, a1
, a2
^a0
, b1
, b2
^b0
); /* fill middle 4 words */
167 s_bmul_2x2(r
, a1
, a0
, b1
, b0
); /* fill bottom 4 words */
171 zm
[1] ^= r
[1] ^ r
[5];
172 zm
[0] ^= r
[0] ^ r
[4];
180 /* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
181 * result is a binary polynomial in 8 mp_digits r[8].
182 * The caller MUST ensure that r has the right amount of space allocated.
184 void s_bmul_4x4(mp_digit
*r
, const mp_digit a3
, const mp_digit a2
, const mp_digit a1
,
185 const mp_digit a0
, const mp_digit b3
, const mp_digit b2
, const mp_digit b1
,
190 s_bmul_2x2(r
+4, a3
, a2
, b3
, b2
); /* fill top 4 words */
191 s_bmul_2x2(zm
, a3
^a1
, a2
^a0
, b3
^b1
, b2
^b0
); /* fill middle 4 words */
192 s_bmul_2x2(r
, a1
, a0
, b1
, b0
); /* fill bottom 4 words */
194 zm
[3] ^= r
[3] ^ r
[7];
195 zm
[2] ^= r
[2] ^ r
[6];
196 zm
[1] ^= r
[1] ^ r
[5];
197 zm
[0] ^= r
[0] ^ r
[4];
205 /* Compute addition of two binary polynomials a and b,
206 * store result in c; c could be a or b, a and b could be equal;
207 * c is the bitwise XOR of a and b.
210 mp_badd(const mp_int
*a
, const mp_int
*b
, mp_int
*c
)
212 mp_digit
*pa
, *pb
, *pc
;
214 mp_size used_pa
, used_pb
;
215 mp_err res
= MP_OKAY
;
217 /* Add all digits up to the precision of b. If b had more
218 * precision than a initially, swap a, b first
220 if (MP_USED(a
) >= MP_USED(b
)) {
223 used_pa
= MP_USED(a
);
224 used_pb
= MP_USED(b
);
228 used_pa
= MP_USED(b
);
229 used_pb
= MP_USED(a
);
232 /* Make sure c has enough precision for the output value */
233 MP_CHECKOK( s_mp_pad(c
, used_pa
) );
235 /* Do word-by-word xor */
237 for (ix
= 0; ix
< used_pb
; ix
++) {
238 (*pc
++) = (*pa
++) ^ (*pb
++);
241 /* Finish the rest of digits until we're actually done */
242 for (; ix
< used_pa
; ++ix
) {
246 MP_USED(c
) = used_pa
;
254 #define s_mp_div2(a) MP_CHECKOK( mpl_rsh((a), (a), 1) );
256 /* Compute binary polynomial multiply d = a * b */
258 s_bmul_d(const mp_digit
*a
, mp_size a_len
, mp_digit b
, mp_digit
*d
)
260 mp_digit a_i
, a0b0
, a1b1
, carry
= 0;
263 s_bmul_1x1(&a1b1
, &a0b0
, a_i
, b
);
270 /* Compute binary polynomial xor multiply accumulate d ^= a * b */
272 s_bmul_d_add(const mp_digit
*a
, mp_size a_len
, mp_digit b
, mp_digit
*d
)
274 mp_digit a_i
, a0b0
, a1b1
, carry
= 0;
277 s_bmul_1x1(&a1b1
, &a0b0
, a_i
, b
);
278 *d
++ ^= a0b0
^ carry
;
284 /* Compute binary polynomial xor multiply c = a * b.
285 * All parameters may be identical.
288 mp_bmul(const mp_int
*a
, const mp_int
*b
, mp_int
*c
)
292 mp_size ib
, a_used
, b_used
;
293 mp_err res
= MP_OKAY
;
297 ARGCHK(a
!= NULL
&& b
!= NULL
&& c
!= NULL
, MP_BADARG
);
300 MP_CHECKOK( mp_init_copy(&tmp
, a
) );
305 MP_CHECKOK( mp_init_copy(&tmp
, b
) );
309 if (MP_USED(a
) < MP_USED(b
)) {
310 const mp_int
*xch
= b
; /* switch a and b if b longer */
315 MP_USED(c
) = 1; MP_DIGIT(c
, 0) = 0;
316 MP_CHECKOK( s_mp_pad(c
, USED(a
) + USED(b
)) );
319 s_bmul_d(MP_DIGITS(a
), MP_USED(a
), *pb
++, MP_DIGITS(c
));
321 /* Outer loop: Digits of b */
324 MP_USED(c
) = a_used
+ b_used
;
325 for (ib
= 1; ib
< b_used
; ib
++) {
328 /* Inner product: Digits of a */
330 s_bmul_d_add(MP_DIGITS(a
), a_used
, b_i
, MP_DIGITS(c
) + ib
);
332 MP_DIGIT(c
, ib
+ a_used
) = b_i
;
345 /* Compute modular reduction of a and store result in r.
347 * For modular arithmetic, the irreducible polynomial f(t) is represented
348 * as an array of int[], where f(t) is of the form:
349 * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
350 * where m = p[0] > p[1] > ... > p[k] = 0.
353 mp_bmod(const mp_int
*a
, const unsigned int p
[], mp_int
*r
)
357 mp_digit zz
, *z
, tmp
;
359 mp_err res
= MP_OKAY
;
361 /* The algorithm does the reduction in place in r,
362 * if a != r, copy a into r first so reduction can be done in r
365 MP_CHECKOK( mp_copy(a
, r
) );
369 /* start reduction */
370 dN
= p
[0] / MP_DIGIT_BITS
;
373 for (j
= used
- 1; j
> dN
;) {
381 for (k
= 1; p
[k
] > 0; k
++) {
382 /* reducing component t^p[k] */
384 d0
= n
% MP_DIGIT_BITS
;
385 d1
= MP_DIGIT_BITS
- d0
;
389 z
[j
-n
-1] ^= (zz
<<d1
);
392 /* reducing component t^0 */
394 d0
= p
[0] % MP_DIGIT_BITS
;
395 d1
= MP_DIGIT_BITS
- d0
;
396 z
[j
-n
] ^= (zz
>> d0
);
398 z
[j
-n
-1] ^= (zz
<< d1
);
402 /* final round of reduction */
405 d0
= p
[0] % MP_DIGIT_BITS
;
408 d1
= MP_DIGIT_BITS
- d0
;
410 /* clear up the top d1 bits */
411 if (d0
) z
[dN
] = (z
[dN
] << d1
) >> d1
;
412 *z
^= zz
; /* reduction t^0 component */
414 for (k
= 1; p
[k
] > 0; k
++) {
415 /* reducing component t^p[k]*/
416 n
= p
[k
] / MP_DIGIT_BITS
;
417 d0
= p
[k
] % MP_DIGIT_BITS
;
418 d1
= MP_DIGIT_BITS
- d0
;
431 /* Compute the product of two polynomials a and b, reduce modulo p,
432 * Store the result in r. r could be a or b; a could be b.
435 mp_bmulmod(const mp_int
*a
, const mp_int
*b
, const unsigned int p
[], mp_int
*r
)
439 if (a
== b
) return mp_bsqrmod(a
, p
, r
);
440 if ((res
= mp_bmul(a
, b
, r
) ) != MP_OKAY
)
442 return mp_bmod(r
, p
, r
);
445 /* Compute binary polynomial squaring c = a*a mod p .
446 * Parameter r and a can be identical.
450 mp_bsqrmod(const mp_int
*a
, const unsigned int p
[], mp_int
*r
)
452 mp_digit
*pa
, *pr
, a_i
;
457 ARGCHK(a
!= NULL
&& r
!= NULL
, MP_BADARG
);
461 MP_CHECKOK( mp_init_copy(&tmp
, a
) );
465 MP_USED(r
) = 1; MP_DIGIT(r
, 0) = 0;
466 MP_CHECKOK( s_mp_pad(r
, 2*USED(a
)) );
471 MP_USED(r
) = 2 * a_used
;
473 for (ia
= 0; ia
< a_used
; ia
++) {
475 *pr
++ = gf2m_SQR0(a_i
);
476 *pr
++ = gf2m_SQR1(a_i
);
479 MP_CHECKOK( mp_bmod(r
, p
, r
) );
488 /* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
489 * Store the result in r. r could be x or y, and x could equal y.
490 * Uses algorithm Modular_Division_GF(2^m) from
491 * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
495 mp_bdivmod(const mp_int
*y
, const mp_int
*x
, const mp_int
*pp
,
496 const unsigned int p
[], mp_int
*r
)
499 mp_int
*a
, *b
, *u
, *v
;
500 mp_err res
= MP_OKAY
;
506 MP_CHECKOK( mp_init_copy(&aa
, x
) );
507 MP_CHECKOK( mp_init_copy(&uu
, y
) );
508 MP_CHECKOK( mp_init_copy(&bb
, pp
) );
509 MP_CHECKOK( s_mp_pad(r
, USED(pp
)) );
510 MP_USED(r
) = 1; MP_DIGIT(r
, 0) = 0;
512 a
= &aa
; b
= &bb
; u
=&uu
; v
=r
;
513 /* reduce x and y mod p */
514 MP_CHECKOK( mp_bmod(a
, p
, a
) );
515 MP_CHECKOK( mp_bmod(u
, p
, u
) );
517 while (!mp_isodd(a
)) {
520 MP_CHECKOK( mp_badd(u
, pp
, u
) );
526 if (mp_cmp_mag(b
, a
) > 0) {
527 MP_CHECKOK( mp_badd(b
, a
, b
) );
528 MP_CHECKOK( mp_badd(v
, u
, v
) );
532 MP_CHECKOK( mp_badd(v
, pp
, v
) );
535 } while (!mp_isodd(b
));
537 else if ((MP_DIGIT(a
,0) == 1) && (MP_USED(a
) == 1))
540 MP_CHECKOK( mp_badd(a
, b
, a
) );
541 MP_CHECKOK( mp_badd(u
, v
, u
) );
545 MP_CHECKOK( mp_badd(u
, pp
, u
) );
548 } while (!mp_isodd(a
));
552 MP_CHECKOK( mp_copy(u
, r
) );
555 /* XXX this appears to be a memory leak in the NSS code */
563 /* Convert the bit-string representation of a polynomial a into an array
564 * of integers corresponding to the bits with non-zero coefficient.
565 * Up to max elements of the array will be filled. Return value is total
566 * number of coefficients that would be extracted if array was large enough.
569 mp_bpoly2arr(const mp_int
*a
, unsigned int p
[], int max
)
572 mp_digit top_bit
, mask
;
575 top_bit
<<= MP_DIGIT_BIT
- 1;
577 for (k
= 0; k
< max
; k
++) p
[k
] = 0;
580 for (i
= MP_USED(a
) - 1; i
>= 0; i
--) {
582 for (j
= MP_DIGIT_BIT
- 1; j
>= 0; j
--) {
583 if (MP_DIGITS(a
)[i
] & mask
) {
584 if (k
< max
) p
[k
] = MP_DIGIT_BIT
* i
+ j
;
594 /* Convert the coefficient array representation of a polynomial to a
595 * bit-string. The array must be terminated by 0.
598 mp_barr2poly(const unsigned int p
[], mp_int
*a
)
601 mp_err res
= MP_OKAY
;
605 for (i
= 0; p
[i
] > 0; i
++) {
606 MP_CHECKOK( mpl_set_bit(a
, p
[i
], 1) );
608 MP_CHECKOK( mpl_set_bit(a
, 0, 1) );