Bump actions/upload-artifacts version
[libtommath.git] / s_mp_sqr.c
blobda9aa69ce8d73af51cc27d01c318808fe495fc42
1 #include "tommath_private.h"
2 #ifdef S_MP_SQR_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 /* SPDX-License-Identifier: Unlicense */
6 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
7 mp_err s_mp_sqr(const mp_int *a, mp_int *b)
9 mp_int t;
10 int ix, pa;
11 mp_err err;
13 pa = a->used;
14 if ((err = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {
15 return err;
18 /* default used is maximum possible size */
19 t.used = (2 * pa) + 1;
21 for (ix = 0; ix < pa; ix++) {
22 mp_digit u;
23 int iy;
25 /* first calculate the digit at 2*ix */
26 /* calculate double precision result */
27 mp_word r = (mp_word)t.dp[2*ix] +
28 ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);
30 /* store lower part in result */
31 t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
33 /* get the carry */
34 u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
36 for (iy = ix + 1; iy < pa; iy++) {
37 /* first calculate the product */
38 r = (mp_word)a->dp[ix] * (mp_word)a->dp[iy];
40 /* now calculate the double precision result, note we use
41 * addition instead of *2 since it's easier to optimize.
43 /* Some architectures and/or compilers seem to prefer a bit-shift nowadays */
44 r = (mp_word)t.dp[ix + iy] + (r<<1) + (mp_word)u;
46 /* store lower part */
47 t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
49 /* get carry */
50 u = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
52 /* propagate upwards */
53 while (u != 0uL) {
54 mp_digit tmp;
56 "u" can get bigger than MP_DIGIT_MAX and would need a bigger type
57 for the sum (mp_word). That is costly if mp_word is not a native
58 integer but a bigint from the compiler library. We do a manual
59 multiword addition instead.
61 /* t.dp[ix + iy] has been masked off by MP_MASK and is hence of the correct size
62 and we can just add the lower part of "u". Carry is guaranteed to fit into
63 the type used for mp_digit, too, so we can extract it later. */
64 tmp = t.dp[ix + iy] + (u & MP_MASK);
65 /* t.dp[ix + iy] is set to the result minus the carry, carry is still in "tmp" */
66 t.dp[ix + iy] = tmp & MP_MASK;
67 /* Add high part of "u" and the carry from "tmp" to get the next "u" */
68 u = (u >> MP_DIGIT_BIT) + (tmp >> MP_DIGIT_BIT);
69 ++iy;
73 mp_clamp(&t);
74 mp_exch(&t, b);
75 mp_clear(&t);
76 return MP_OKAY;
78 #endif