4 //FIXME Not checked on threadsafety yet; after checking please remove this line
5 /* crypto/bn/bn_asm.c */
6 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
9 * This package is an SSL implementation written
10 * by Eric Young (eay@cryptsoft.com).
11 * The implementation was written so as to conform with Netscapes SSL.
13 * This library is free for commercial and non-commercial use as long as
14 * the following conditions are aheared to. The following conditions
15 * apply to all code found in this distribution, be it the RC4, RSA,
16 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
17 * included with this distribution is covered by the same copyright terms
18 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
20 * Copyright remains Eric Young's, and as such any Copyright notices in
21 * the code are not to be removed.
22 * If this package is used in a product, Eric Young should be given attribution
23 * as the author of the parts of the library used.
24 * This can be in the form of a textual message at program startup or
25 * in documentation (online or textual) provided with the package.
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
30 * 1. Redistributions of source code must retain the copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * "This product includes cryptographic software written by
38 * Eric Young (eay@cryptsoft.com)"
39 * The word 'cryptographic' can be left out if the rouines from the library
40 * being used are not cryptographic related :-).
41 * 4. If you include any Windows specific code (or a derivative thereof) from
42 * the apps directory (application code) you must include an acknowledgement:
43 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
45 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * The license and distribution terms for any publically available version or
58 * derivative of this code cannot be changed. i.e. this code cannot simply be
59 * copied and put under another distribution license
60 * [including the GNU Public License.]
64 # undef NDEBUG /* avoid conflicting definitions */
71 #include "openssl_mods.h"
73 #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
75 BN_ULONG
bn_mul_add_words(BN_ULONG
*rp
, BN_ULONG
*ap
, int num
, BN_ULONG w
)
80 if(num
<= 0) { return (c1
); }
84 mul_add(rp
[0], ap
[0], w
, c1
);
85 mul_add(rp
[1], ap
[1], w
, c1
);
86 mul_add(rp
[2], ap
[2], w
, c1
);
87 mul_add(rp
[3], ap
[3], w
, c1
);
94 mul_add(rp
[0], ap
[0], w
, c1
);
95 if(--num
== 0) { return c1
; }
96 mul_add(rp
[1], ap
[1], w
, c1
);
97 if(--num
== 0) { return c1
; }
98 mul_add(rp
[2], ap
[2], w
, c1
);
105 BN_ULONG
bn_mul_words(BN_ULONG
*rp
, BN_ULONG
*ap
, int num
, BN_ULONG w
)
110 if(num
<= 0) { return (c1
); }
114 mul(rp
[0], ap
[0], w
, c1
);
115 mul(rp
[1], ap
[1], w
, c1
);
116 mul(rp
[2], ap
[2], w
, c1
);
117 mul(rp
[3], ap
[3], w
, c1
);
124 mul(rp
[0], ap
[0], w
, c1
);
125 if(--num
== 0) { return c1
; }
126 mul(rp
[1], ap
[1], w
, c1
);
127 if(--num
== 0) { return c1
; }
128 mul(rp
[2], ap
[2], w
, c1
);
133 void bn_sqr_words(BN_ULONG
*r
, BN_ULONG
*a
, int n
)
136 if(n
<= 0) { return; }
139 sqr(r
[0], r
[1], a
[0]);
140 sqr(r
[2], r
[3], a
[1]);
141 sqr(r
[4], r
[5], a
[2]);
142 sqr(r
[6], r
[7], a
[3]);
149 sqr(r
[0], r
[1], a
[0]);
150 if(--n
== 0) { return; }
151 sqr(r
[2], r
[3], a
[1]);
152 if(--n
== 0) { return; }
153 sqr(r
[4], r
[5], a
[2]);
157 #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
159 BN_ULONG
bn_mul_add_words(BN_ULONG
*rp
, BN_ULONG
*ap
, int num
, BN_ULONG w
)
165 if(num
<= 0) { return ((BN_ULONG
)0); }
172 mul_add(rp
[0], ap
[0], bl
, bh
, c
);
173 if(--num
== 0) { break; }
174 mul_add(rp
[1], ap
[1], bl
, bh
, c
);
175 if(--num
== 0) { break; }
176 mul_add(rp
[2], ap
[2], bl
, bh
, c
);
177 if(--num
== 0) { break; }
178 mul_add(rp
[3], ap
[3], bl
, bh
, c
);
179 if(--num
== 0) { break; }
186 BN_ULONG
bn_mul_words(BN_ULONG
*rp
, BN_ULONG
*ap
, int num
, BN_ULONG w
)
192 if(num
<= 0) { return ((BN_ULONG
)0); }
199 mul(rp
[0], ap
[0], bl
, bh
, carry
);
200 if(--num
== 0) { break; }
201 mul(rp
[1], ap
[1], bl
, bh
, carry
);
202 if(--num
== 0) { break; }
203 mul(rp
[2], ap
[2], bl
, bh
, carry
);
204 if(--num
== 0) { break; }
205 mul(rp
[3], ap
[3], bl
, bh
, carry
);
206 if(--num
== 0) { break; }
213 void bn_sqr_words(BN_ULONG
*r
, BN_ULONG
*a
, int n
)
216 if(n
<= 0) { return; }
219 sqr64(r
[0], r
[1], a
[0]);
220 if(--n
== 0) { break; }
222 sqr64(r
[2], r
[3], a
[1]);
223 if(--n
== 0) { break; }
225 sqr64(r
[4], r
[5], a
[2]);
226 if(--n
== 0) { break; }
228 sqr64(r
[6], r
[7], a
[3]);
229 if(--n
== 0) { break; }
236 #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
238 #if defined(BN_LLONG) && defined(BN_DIV2W)
240 BN_ULONG
bn_div_words(BN_ULONG h
, BN_ULONG l
, BN_ULONG d
)
242 return ((BN_ULONG
)(((((BN_ULLONG
)h
) << BN_BITS2
) | l
) / (BN_ULLONG
)d
));
247 /* Divide h,l by d and return the result. */
248 /* I need to test this some more :-( */
249 BN_ULONG
bn_div_words(BN_ULONG h
, BN_ULONG l
, BN_ULONG d
)
251 BN_ULONG dh
, dl
, q
, ret
= 0, th
, tl
, t
;
254 if(d
== 0) { return (BN_MASK2
); }
256 i
= BN_num_bits_word(d
);
257 assert((i
== BN_BITS2
) || (h
> (BN_ULONG
)1 << i
));
260 if(h
>= d
) { h
-= d
; }
265 h
= (h
<< i
) | (l
>> (BN_BITS2
- i
));
268 dh
= (d
& BN_MASK2h
) >> BN_BITS4
;
269 dl
= (d
& BN_MASK2l
);
272 if((h
>> BN_BITS4
) == dh
)
282 if((t
& BN_MASK2h
) ||
285 ((l
& BN_MASK2h
) >> BN_BITS4
))))
291 t
= (tl
>> BN_BITS4
);
292 tl
= (tl
<< BN_BITS4
)&BN_MASK2h
;
304 if(--count
== 0) { break; }
307 h
= ((h
<< BN_BITS4
) | (l
>> BN_BITS4
))&BN_MASK2
;
308 l
= (l
& BN_MASK2l
) << BN_BITS4
;
313 #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
316 BN_ULONG
bn_add_words(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
, int n
)
321 if(n
<= 0) { return ((BN_ULONG
)0); }
325 ll
+= (BN_ULLONG
)a
[0] + b
[0];
326 r
[0] = (BN_ULONG
)ll
& BN_MASK2
;
328 if(--n
<= 0) { break; }
330 ll
+= (BN_ULLONG
)a
[1] + b
[1];
331 r
[1] = (BN_ULONG
)ll
& BN_MASK2
;
333 if(--n
<= 0) { break; }
335 ll
+= (BN_ULLONG
)a
[2] + b
[2];
336 r
[2] = (BN_ULONG
)ll
& BN_MASK2
;
338 if(--n
<= 0) { break; }
340 ll
+= (BN_ULLONG
)a
[3] + b
[3];
341 r
[3] = (BN_ULONG
)ll
& BN_MASK2
;
343 if(--n
<= 0) { break; }
349 return ((BN_ULONG
)ll
);
351 #else /* !BN_LLONG */
352 BN_ULONG
bn_add_words(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
, int n
)
357 if(n
<= 0) { return ((BN_ULONG
)0); }
363 t
= (t
+ c
)&BN_MASK2
;
365 l
= (t
+ b
[0])&BN_MASK2
;
368 if(--n
<= 0) { break; }
371 t
= (t
+ c
)&BN_MASK2
;
373 l
= (t
+ b
[1])&BN_MASK2
;
376 if(--n
<= 0) { break; }
379 t
= (t
+ c
)&BN_MASK2
;
381 l
= (t
+ b
[2])&BN_MASK2
;
384 if(--n
<= 0) { break; }
387 t
= (t
+ c
)&BN_MASK2
;
389 l
= (t
+ b
[3])&BN_MASK2
;
392 if(--n
<= 0) { break; }
398 return ((BN_ULONG
)c
);
400 #endif /* !BN_LLONG */
402 BN_ULONG
bn_sub_words(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
, int n
)
408 if(n
<= 0) { return ((BN_ULONG
)0); }
414 r
[0] = (t1
- t2
- c
)&BN_MASK2
;
415 if(t1
!= t2
) { c
= (t1
< t2
); }
416 if(--n
<= 0) { break; }
420 r
[1] = (t1
- t2
- c
)&BN_MASK2
;
421 if(t1
!= t2
) { c
= (t1
< t2
); }
422 if(--n
<= 0) { break; }
426 r
[2] = (t1
- t2
- c
)&BN_MASK2
;
427 if(t1
!= t2
) { c
= (t1
< t2
); }
428 if(--n
<= 0) { break; }
432 r
[3] = (t1
- t2
- c
)&BN_MASK2
;
433 if(t1
!= t2
) { c
= (t1
< t2
); }
434 if(--n
<= 0) { break; }
450 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
451 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
452 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
453 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
456 #define mul_add_c(a,b,c0,c1,c2) \
458 t1=(BN_ULONG)Lw(t); \
459 t2=(BN_ULONG)Hw(t); \
460 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
461 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
463 #define mul_add_c2(a,b,c0,c1,c2) \
467 t1=(BN_ULONG)Lw(tt); \
468 t2=(BN_ULONG)Hw(tt); \
469 c0=(c0+t1)&BN_MASK2; \
470 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
471 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
473 #define sqr_add_c(a,i,c0,c1,c2) \
474 t=(BN_ULLONG)a[i]*a[i]; \
475 t1=(BN_ULONG)Lw(t); \
476 t2=(BN_ULONG)Hw(t); \
477 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
478 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
480 #define sqr_add_c2(a,i,j,c0,c1,c2) \
481 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
483 #elif defined(BN_UMULT_HIGH)
485 #define mul_add_c(a,b,c0,c1,c2) { \
486 BN_ULONG ta=(a),tb=(b); \
488 t2 = BN_UMULT_HIGH(ta,tb); \
489 c0 += t1; t2 += (c0<t1)?1:0; \
490 c1 += t2; c2 += (c1<t2)?1:0; \
493 #define mul_add_c2(a,b,c0,c1,c2) { \
494 BN_ULONG ta=(a),tb=(b),t0; \
495 t1 = BN_UMULT_HIGH(ta,tb); \
497 t2 = t1+t1; c2 += (t2<t1)?1:0; \
498 t1 = t0+t0; t2 += (t1<t0)?1:0; \
499 c0 += t1; t2 += (c0<t1)?1:0; \
500 c1 += t2; c2 += (c1<t2)?1:0; \
503 #define sqr_add_c(a,i,c0,c1,c2) { \
504 BN_ULONG ta=(a)[i]; \
506 t2 = BN_UMULT_HIGH(ta,ta); \
507 c0 += t1; t2 += (c0<t1)?1:0; \
508 c1 += t2; c2 += (c1<t2)?1:0; \
511 #define sqr_add_c2(a,i,j,c0,c1,c2) \
512 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
514 #else /* !BN_LLONG */
515 #define mul_add_c(a,b,c0,c1,c2) \
516 t1=LBITS(a); t2=HBITS(a); \
517 bl=LBITS(b); bh=HBITS(b); \
518 mul64(t1,t2,bl,bh); \
519 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
520 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
522 #define mul_add_c2(a,b,c0,c1,c2) \
523 t1=LBITS(a); t2=HBITS(a); \
524 bl=LBITS(b); bh=HBITS(b); \
525 mul64(t1,t2,bl,bh); \
526 if (t2 & BN_TBIT) c2++; \
527 t2=(t2+t2)&BN_MASK2; \
528 if (t1 & BN_TBIT) t2++; \
529 t1=(t1+t1)&BN_MASK2; \
530 c0=(c0+t1)&BN_MASK2; \
531 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
532 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
534 #define sqr_add_c(a,i,c0,c1,c2) \
535 sqr64(t1,t2,(a)[i]); \
536 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
537 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
539 #define sqr_add_c2(a,i,j,c0,c1,c2) \
540 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
541 #endif /* !BN_LLONG */
543 void bn_mul_comba8(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
)
556 mul_add_c(a
[0], b
[0], c1
, c2
, c3
);
559 mul_add_c(a
[0], b
[1], c2
, c3
, c1
);
560 mul_add_c(a
[1], b
[0], c2
, c3
, c1
);
563 mul_add_c(a
[2], b
[0], c3
, c1
, c2
);
564 mul_add_c(a
[1], b
[1], c3
, c1
, c2
);
565 mul_add_c(a
[0], b
[2], c3
, c1
, c2
);
568 mul_add_c(a
[0], b
[3], c1
, c2
, c3
);
569 mul_add_c(a
[1], b
[2], c1
, c2
, c3
);
570 mul_add_c(a
[2], b
[1], c1
, c2
, c3
);
571 mul_add_c(a
[3], b
[0], c1
, c2
, c3
);
574 mul_add_c(a
[4], b
[0], c2
, c3
, c1
);
575 mul_add_c(a
[3], b
[1], c2
, c3
, c1
);
576 mul_add_c(a
[2], b
[2], c2
, c3
, c1
);
577 mul_add_c(a
[1], b
[3], c2
, c3
, c1
);
578 mul_add_c(a
[0], b
[4], c2
, c3
, c1
);
581 mul_add_c(a
[0], b
[5], c3
, c1
, c2
);
582 mul_add_c(a
[1], b
[4], c3
, c1
, c2
);
583 mul_add_c(a
[2], b
[3], c3
, c1
, c2
);
584 mul_add_c(a
[3], b
[2], c3
, c1
, c2
);
585 mul_add_c(a
[4], b
[1], c3
, c1
, c2
);
586 mul_add_c(a
[5], b
[0], c3
, c1
, c2
);
589 mul_add_c(a
[6], b
[0], c1
, c2
, c3
);
590 mul_add_c(a
[5], b
[1], c1
, c2
, c3
);
591 mul_add_c(a
[4], b
[2], c1
, c2
, c3
);
592 mul_add_c(a
[3], b
[3], c1
, c2
, c3
);
593 mul_add_c(a
[2], b
[4], c1
, c2
, c3
);
594 mul_add_c(a
[1], b
[5], c1
, c2
, c3
);
595 mul_add_c(a
[0], b
[6], c1
, c2
, c3
);
598 mul_add_c(a
[0], b
[7], c2
, c3
, c1
);
599 mul_add_c(a
[1], b
[6], c2
, c3
, c1
);
600 mul_add_c(a
[2], b
[5], c2
, c3
, c1
);
601 mul_add_c(a
[3], b
[4], c2
, c3
, c1
);
602 mul_add_c(a
[4], b
[3], c2
, c3
, c1
);
603 mul_add_c(a
[5], b
[2], c2
, c3
, c1
);
604 mul_add_c(a
[6], b
[1], c2
, c3
, c1
);
605 mul_add_c(a
[7], b
[0], c2
, c3
, c1
);
608 mul_add_c(a
[7], b
[1], c3
, c1
, c2
);
609 mul_add_c(a
[6], b
[2], c3
, c1
, c2
);
610 mul_add_c(a
[5], b
[3], c3
, c1
, c2
);
611 mul_add_c(a
[4], b
[4], c3
, c1
, c2
);
612 mul_add_c(a
[3], b
[5], c3
, c1
, c2
);
613 mul_add_c(a
[2], b
[6], c3
, c1
, c2
);
614 mul_add_c(a
[1], b
[7], c3
, c1
, c2
);
617 mul_add_c(a
[2], b
[7], c1
, c2
, c3
);
618 mul_add_c(a
[3], b
[6], c1
, c2
, c3
);
619 mul_add_c(a
[4], b
[5], c1
, c2
, c3
);
620 mul_add_c(a
[5], b
[4], c1
, c2
, c3
);
621 mul_add_c(a
[6], b
[3], c1
, c2
, c3
);
622 mul_add_c(a
[7], b
[2], c1
, c2
, c3
);
625 mul_add_c(a
[7], b
[3], c2
, c3
, c1
);
626 mul_add_c(a
[6], b
[4], c2
, c3
, c1
);
627 mul_add_c(a
[5], b
[5], c2
, c3
, c1
);
628 mul_add_c(a
[4], b
[6], c2
, c3
, c1
);
629 mul_add_c(a
[3], b
[7], c2
, c3
, c1
);
632 mul_add_c(a
[4], b
[7], c3
, c1
, c2
);
633 mul_add_c(a
[5], b
[6], c3
, c1
, c2
);
634 mul_add_c(a
[6], b
[5], c3
, c1
, c2
);
635 mul_add_c(a
[7], b
[4], c3
, c1
, c2
);
638 mul_add_c(a
[7], b
[5], c1
, c2
, c3
);
639 mul_add_c(a
[6], b
[6], c1
, c2
, c3
);
640 mul_add_c(a
[5], b
[7], c1
, c2
, c3
);
643 mul_add_c(a
[6], b
[7], c2
, c3
, c1
);
644 mul_add_c(a
[7], b
[6], c2
, c3
, c1
);
647 mul_add_c(a
[7], b
[7], c3
, c1
, c2
);
652 void bn_mul_comba4(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
)
665 mul_add_c(a
[0], b
[0], c1
, c2
, c3
);
668 mul_add_c(a
[0], b
[1], c2
, c3
, c1
);
669 mul_add_c(a
[1], b
[0], c2
, c3
, c1
);
672 mul_add_c(a
[2], b
[0], c3
, c1
, c2
);
673 mul_add_c(a
[1], b
[1], c3
, c1
, c2
);
674 mul_add_c(a
[0], b
[2], c3
, c1
, c2
);
677 mul_add_c(a
[0], b
[3], c1
, c2
, c3
);
678 mul_add_c(a
[1], b
[2], c1
, c2
, c3
);
679 mul_add_c(a
[2], b
[1], c1
, c2
, c3
);
680 mul_add_c(a
[3], b
[0], c1
, c2
, c3
);
683 mul_add_c(a
[3], b
[1], c2
, c3
, c1
);
684 mul_add_c(a
[2], b
[2], c2
, c3
, c1
);
685 mul_add_c(a
[1], b
[3], c2
, c3
, c1
);
688 mul_add_c(a
[2], b
[3], c3
, c1
, c2
);
689 mul_add_c(a
[3], b
[2], c3
, c1
, c2
);
692 mul_add_c(a
[3], b
[3], c1
, c2
, c3
);
697 void bn_sqr_comba8(BN_ULONG
*r
, BN_ULONG
*a
)
710 sqr_add_c(a
, 0, c1
, c2
, c3
);
713 sqr_add_c2(a
, 1, 0, c2
, c3
, c1
);
716 sqr_add_c(a
, 1, c3
, c1
, c2
);
717 sqr_add_c2(a
, 2, 0, c3
, c1
, c2
);
720 sqr_add_c2(a
, 3, 0, c1
, c2
, c3
);
721 sqr_add_c2(a
, 2, 1, c1
, c2
, c3
);
724 sqr_add_c(a
, 2, c2
, c3
, c1
);
725 sqr_add_c2(a
, 3, 1, c2
, c3
, c1
);
726 sqr_add_c2(a
, 4, 0, c2
, c3
, c1
);
729 sqr_add_c2(a
, 5, 0, c3
, c1
, c2
);
730 sqr_add_c2(a
, 4, 1, c3
, c1
, c2
);
731 sqr_add_c2(a
, 3, 2, c3
, c1
, c2
);
734 sqr_add_c(a
, 3, c1
, c2
, c3
);
735 sqr_add_c2(a
, 4, 2, c1
, c2
, c3
);
736 sqr_add_c2(a
, 5, 1, c1
, c2
, c3
);
737 sqr_add_c2(a
, 6, 0, c1
, c2
, c3
);
740 sqr_add_c2(a
, 7, 0, c2
, c3
, c1
);
741 sqr_add_c2(a
, 6, 1, c2
, c3
, c1
);
742 sqr_add_c2(a
, 5, 2, c2
, c3
, c1
);
743 sqr_add_c2(a
, 4, 3, c2
, c3
, c1
);
746 sqr_add_c(a
, 4, c3
, c1
, c2
);
747 sqr_add_c2(a
, 5, 3, c3
, c1
, c2
);
748 sqr_add_c2(a
, 6, 2, c3
, c1
, c2
);
749 sqr_add_c2(a
, 7, 1, c3
, c1
, c2
);
752 sqr_add_c2(a
, 7, 2, c1
, c2
, c3
);
753 sqr_add_c2(a
, 6, 3, c1
, c2
, c3
);
754 sqr_add_c2(a
, 5, 4, c1
, c2
, c3
);
757 sqr_add_c(a
, 5, c2
, c3
, c1
);
758 sqr_add_c2(a
, 6, 4, c2
, c3
, c1
);
759 sqr_add_c2(a
, 7, 3, c2
, c3
, c1
);
762 sqr_add_c2(a
, 7, 4, c3
, c1
, c2
);
763 sqr_add_c2(a
, 6, 5, c3
, c1
, c2
);
766 sqr_add_c(a
, 6, c1
, c2
, c3
);
767 sqr_add_c2(a
, 7, 5, c1
, c2
, c3
);
770 sqr_add_c2(a
, 7, 6, c2
, c3
, c1
);
773 sqr_add_c(a
, 7, c3
, c1
, c2
);
778 void bn_sqr_comba4(BN_ULONG
*r
, BN_ULONG
*a
)
791 sqr_add_c(a
, 0, c1
, c2
, c3
);
794 sqr_add_c2(a
, 1, 0, c2
, c3
, c1
);
797 sqr_add_c(a
, 1, c3
, c1
, c2
);
798 sqr_add_c2(a
, 2, 0, c3
, c1
, c2
);
801 sqr_add_c2(a
, 3, 0, c1
, c2
, c3
);
802 sqr_add_c2(a
, 2, 1, c1
, c2
, c3
);
805 sqr_add_c(a
, 2, c2
, c3
, c1
);
806 sqr_add_c2(a
, 3, 1, c2
, c3
, c1
);
809 sqr_add_c2(a
, 3, 2, c3
, c1
, c2
);
812 sqr_add_c(a
, 3, c1
, c2
, c3
);
816 #else /* !BN_MUL_COMBA */
818 /* hmm... is it faster just to do a multiply? */
820 void bn_sqr_comba4(BN_ULONG
*r
, BN_ULONG
*a
)
823 bn_sqr_normal(r
, a
, 4, t
);
827 void bn_sqr_comba8(BN_ULONG
*r
, BN_ULONG
*a
)
830 bn_sqr_normal(r
, a
, 8, t
);
833 void bn_mul_comba4(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
)
835 r
[4] = bn_mul_words(&(r
[0]), a
, 4, b
[0]);
836 r
[5] = bn_mul_add_words(&(r
[1]), a
, 4, b
[1]);
837 r
[6] = bn_mul_add_words(&(r
[2]), a
, 4, b
[2]);
838 r
[7] = bn_mul_add_words(&(r
[3]), a
, 4, b
[3]);
841 void bn_mul_comba8(BN_ULONG
*r
, BN_ULONG
*a
, BN_ULONG
*b
)
843 r
[ 8] = bn_mul_words(&(r
[0]), a
, 8, b
[0]);
844 r
[ 9] = bn_mul_add_words(&(r
[1]), a
, 8, b
[1]);
845 r
[10] = bn_mul_add_words(&(r
[2]), a
, 8, b
[2]);
846 r
[11] = bn_mul_add_words(&(r
[3]), a
, 8, b
[3]);
847 r
[12] = bn_mul_add_words(&(r
[4]), a
, 8, b
[4]);
848 r
[13] = bn_mul_add_words(&(r
[5]), a
, 8, b
[5]);
849 r
[14] = bn_mul_add_words(&(r
[6]), a
, 8, b
[6]);
850 r
[15] = bn_mul_add_words(&(r
[7]), a
, 8, b
[7]);
853 #endif /* !BN_MUL_COMBA */