s_mp_sqr.c

   1 #include "tommath_private.h"
   2 #ifdef S_MP_SQR_C
   3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
   4 /* SPDX-License-Identifier: Unlicense */
   5
   6 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
   7 mp_err s_mp_sqr(const mp_int *a, mp_int *b)
   8 {
   9    mp_int   t;
  10    int      ix, pa;
  11    mp_err   err;
  12
  13    pa = a->used;
  14    if ((err = mp_init_size(&t, (2 * pa) + 1)) != MP_OKAY) {
  15       return err;
  16    }
  17
  18    /* default used is maximum possible size */
  19    t.used = (2 * pa) + 1;
  20
  21    for (ix = 0; ix < pa; ix++) {
  22       mp_digit u;
  23       int iy;
  24
  25       /* first calculate the digit at 2*ix */
  26       /* calculate double precision result */
  27       mp_word r = (mp_word)t.dp[2*ix] +
  28                   ((mp_word)a->dp[ix] * (mp_word)a->dp[ix]);
  29
  30       /* store lower part in result */
  31       t.dp[ix+ix] = (mp_digit)(r & (mp_word)MP_MASK);
  32
  33       /* get the carry */
  34       u           = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
  35
  36       for (iy = ix + 1; iy < pa; iy++) {
  37          /* first calculate the product */
  38          r       = (mp_word)a->dp[ix] * (mp_word)a->dp[iy];
  39
  40          /* now calculate the double precision result, note we use
  41           * addition instead of *2 since it's easier to optimize.
  42           */
  43          /* Some architectures and/or compilers seem to prefer a bit-shift nowadays */
  44          r       = (mp_word)t.dp[ix + iy] + (r<<1) + (mp_word)u;
  45
  46          /* store lower part */
  47          t.dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
  48
  49          /* get carry */
  50          u       = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
  51       }
  52       /* propagate upwards */
  53       while (u != 0uL) {
  54          mp_digit tmp;
  55          /*
  56             "u" can get bigger than MP_DIGIT_MAX and would need a bigger type
  57             for the sum (mp_word). That is costly if mp_word is not a native
  58             integer but a bigint from the compiler library. We do a manual
  59             multiword addition instead.
  60           */
  61          /* t.dp[ix + iy] has been masked off by MP_MASK and is hence of the correct size
  62             and we can just add the lower part of "u". Carry is guaranteed to fit into
  63             the type used for mp_digit, too, so we can extract it later. */
  64          tmp = t.dp[ix + iy] + (u & MP_MASK);
  65          /* t.dp[ix + iy] is set to the result minus the carry, carry is still in "tmp" */
  66          t.dp[ix + iy] = tmp & MP_MASK;
  67          /* Add high part of "u" and the carry from "tmp" to get the next "u" */
  68          u = (u >> MP_DIGIT_BIT) + (tmp >> MP_DIGIT_BIT);
  69          ++iy;
  70       }
  71    }
  72
  73    mp_clamp(&t);
  74    mp_exch(&t, b);
  75    mp_clear(&t);
  76    return MP_OKAY;
  77 }
  78 #endif