Merge pull request #578 from PX4/fix_mp_prime_strong_lucas_lefridge_compilation
[libtommath.git] / s_mp_exptmod_fast.c
blobe7729f49d9ea325586ba419a166fd2d191ad54e7
1 #include "tommath_private.h"
2 #ifdef S_MP_EXPTMOD_FAST_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 /* SPDX-License-Identifier: Unlicense */
6 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
8 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
9 * The value of k changes based on the size of the exponent.
11 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
14 #ifdef MP_LOW_MEM
15 # define TAB_SIZE 32
16 # define MAX_WINSIZE 5
17 #else
18 # define TAB_SIZE 256
19 # define MAX_WINSIZE 0
20 #endif
22 mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
24 mp_int M[TAB_SIZE], res;
25 mp_digit buf, mp;
26 int bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
27 mp_err err;
29 /* use a pointer to the reduction algorithm. This allows us to use
30 * one of many reduction algorithms without modding the guts of
31 * the code with if statements everywhere.
33 mp_err(*redux)(mp_int *x, const mp_int *n, mp_digit rho);
35 /* find window size */
36 x = mp_count_bits(X);
37 if (x <= 7) {
38 winsize = 2;
39 } else if (x <= 36) {
40 winsize = 3;
41 } else if (x <= 140) {
42 winsize = 4;
43 } else if (x <= 450) {
44 winsize = 5;
45 } else if (x <= 1303) {
46 winsize = 6;
47 } else if (x <= 3529) {
48 winsize = 7;
49 } else {
50 winsize = 8;
53 winsize = MAX_WINSIZE ? MP_MIN(MAX_WINSIZE, winsize) : winsize;
55 /* init M array */
56 /* init first cell */
57 if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
58 return err;
61 /* now init the second half of the array */
62 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
63 if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
64 for (y = 1<<(winsize-1); y < x; y++) {
65 mp_clear(&M[y]);
67 mp_clear(&M[1]);
68 return err;
72 /* determine and setup reduction code */
73 if (redmode == 0) {
74 if (MP_HAS(MP_MONTGOMERY_SETUP)) {
75 /* now setup montgomery */
76 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) goto LBL_M;
77 } else {
78 err = MP_VAL;
79 goto LBL_M;
82 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
83 if (MP_HAS(S_MP_MONTGOMERY_REDUCE_COMBA) &&
84 (((P->used * 2) + 1) < MP_WARRAY) &&
85 (P->used < MP_MAX_COMBA)) {
86 redux = s_mp_montgomery_reduce_comba;
87 } else if (MP_HAS(MP_MONTGOMERY_REDUCE)) {
88 /* use slower baseline Montgomery method */
89 redux = mp_montgomery_reduce;
90 } else {
91 err = MP_VAL;
92 goto LBL_M;
94 } else if (redmode == 1) {
95 if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
96 /* setup DR reduction for moduli of the form B**k - b */
97 mp_dr_setup(P, &mp);
98 redux = mp_dr_reduce;
99 } else {
100 err = MP_VAL;
101 goto LBL_M;
103 } else if (MP_HAS(MP_REDUCE_2K_SETUP) && MP_HAS(MP_REDUCE_2K)) {
104 /* setup DR reduction for moduli of the form 2**k - b */
105 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) goto LBL_M;
106 redux = mp_reduce_2k;
107 } else {
108 err = MP_VAL;
109 goto LBL_M;
112 /* setup result */
113 if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) goto LBL_M;
115 /* create M table
119 * The first half of the table is not computed though accept for M[0] and M[1]
122 if (redmode == 0) {
123 if (MP_HAS(MP_MONTGOMERY_CALC_NORMALIZATION)) {
124 /* now we need R mod m */
125 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) goto LBL_RES;
127 /* now set M[1] to G * R mod m */
128 if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) goto LBL_RES;
129 } else {
130 err = MP_VAL;
131 goto LBL_RES;
133 } else {
134 mp_set(&res, 1uL);
135 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) goto LBL_RES;
138 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
139 if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
141 for (x = 0; x < (winsize - 1); x++) {
142 if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
143 if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) goto LBL_RES;
146 /* create upper table */
147 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
148 if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) goto LBL_RES;
149 if ((err = redux(&M[x], P, mp)) != MP_OKAY) goto LBL_RES;
152 /* set initial mode and bit cnt */
153 mode = 0;
154 bitcnt = 1;
155 buf = 0;
156 digidx = X->used - 1;
157 bitcpy = 0;
158 bitbuf = 0;
160 for (;;) {
161 /* grab next digit as required */
162 if (--bitcnt == 0) {
163 /* if digidx == -1 we are out of digits so break */
164 if (digidx == -1) {
165 break;
167 /* read next digit and reset bitcnt */
168 buf = X->dp[digidx--];
169 bitcnt = (int)MP_DIGIT_BIT;
172 /* grab the next msb from the exponent */
173 y = (mp_digit)(buf >> (MP_DIGIT_BIT - 1)) & 1uL;
174 buf <<= (mp_digit)1;
176 /* if the bit is zero and mode == 0 then we ignore it
177 * These represent the leading zero bits before the first 1 bit
178 * in the exponent. Technically this opt is not required but it
179 * does lower the # of trivial squaring/reductions used
181 if ((mode == 0) && (y == 0)) {
182 continue;
185 /* if the bit is zero and mode == 1 then we square */
186 if ((mode == 1) && (y == 0)) {
187 if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
188 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
189 continue;
192 /* else we add it to the window */
193 bitbuf |= (y << (winsize - ++bitcpy));
194 mode = 2;
196 if (bitcpy == winsize) {
197 /* ok window is filled so square as required and multiply */
198 /* square first */
199 for (x = 0; x < winsize; x++) {
200 if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
201 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
204 /* then multiply */
205 if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) goto LBL_RES;
206 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
208 /* empty window and reset */
209 bitcpy = 0;
210 bitbuf = 0;
211 mode = 1;
215 /* if bits remain then square/multiply */
216 if ((mode == 2) && (bitcpy > 0)) {
217 /* square then multiply if the bit is set */
218 for (x = 0; x < bitcpy; x++) {
219 if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
220 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
222 /* get next bit of the window */
223 bitbuf <<= 1;
224 if ((bitbuf & (1 << winsize)) != 0) {
225 /* then multiply */
226 if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) goto LBL_RES;
227 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
232 if (redmode == 0) {
233 /* fixup result if Montgomery reduction is used
234 * recall that any value in a Montgomery system is
235 * actually multiplied by R mod n. So we have
236 * to reduce one more time to cancel out the factor
237 * of R.
239 if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
242 /* swap res with Y */
243 mp_exch(&res, Y);
244 err = MP_OKAY;
245 LBL_RES:
246 mp_clear(&res);
247 LBL_M:
248 mp_clear(&M[1]);
249 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
250 mp_clear(&M[x]);
252 return err;
254 #endif