lib/mpi/mpih-mul.c

   1 /* mpihelp-mul.c  -  MPI helper functions
   2  * Copyright (C) 1994, 1996, 1998, 1999,
   3  *               2000 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GnuPG.
   6  *
   7  * GnuPG is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * GnuPG is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  20  *
  21  * Note: This code is heavily based on the GNU MP Library.
  22  *       Actually it's the same code with only minor changes in the
  23  *       way the data is stored; this is to support the abstraction
  24  *       of an optional secure memory allocation which may be used
  25  *       to avoid revealing of sensitive data due to paging etc.
  26  *       The GNU MP Library itself is published under the LGPL;
  27  *       however I decided to publish this code under the plain GPL.
  28  */
  29
  30 #include <linux/string.h>
  31 #include "mpi-internal.h"
  32 #include "longlong.h"
  33
  34 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)          \
  35         do {                                                    \
  36                 if ((size) < KARATSUBA_THRESHOLD)               \
  37                         mul_n_basecase(prodp, up, vp, size);    \
  38                 else                                            \
  39                         mul_n(prodp, up, vp, size, tspace);     \
  40         } while (0);
  41
  42 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace)              \
  43         do {                                                    \
  44                 if ((size) < KARATSUBA_THRESHOLD)               \
  45                         mpih_sqr_n_basecase(prodp, up, size);   \
  46                 else                                            \
  47                         mpih_sqr_n(prodp, up, size, tspace);    \
  48         } while (0);
  49
  50 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  51  * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  52  * always stored.  Return the most significant limb.
  53  *
  54  * Argument constraints:
  55  * 1. PRODP != UP and PRODP != VP, i.e. the destination
  56  *    must be distinct from the multiplier and the multiplicand.
  57  *
  58  *
  59  * Handle simple cases with traditional multiplication.
  60  *
  61  * This is the most critical code of multiplication.  All multiplies rely
  62  * on this, both small and huge.  Small ones arrive here immediately.  Huge
  63  * ones arrive here as this is the base case for Karatsuba's recursive
  64  * algorithm below.
  65  */
  66
  67 static mpi_limb_t
  68 mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
  69 {
  70         mpi_size_t i;
  71         mpi_limb_t cy;
  72         mpi_limb_t v_limb;
  73
  74         /* Multiply by the first limb in V separately, as the result can be
  75          * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  76         v_limb = vp[0];
  77         if (v_limb <= 1) {
  78                 if (v_limb == 1)
  79                         MPN_COPY(prodp, up, size);
  80                 else
  81                         MPN_ZERO(prodp, size);
  82                 cy = 0;
  83         } else
  84                 cy = mpihelp_mul_1(prodp, up, size, v_limb);
  85
  86         prodp[size] = cy;
  87         prodp++;
  88
  89         /* For each iteration in the outer loop, multiply one limb from
  90          * U with one limb from V, and add it to PROD.  */
  91         for (i = 1; i < size; i++) {
  92                 v_limb = vp[i];
  93                 if (v_limb <= 1) {
  94                         cy = 0;
  95                         if (v_limb == 1)
  96                                 cy = mpihelp_add_n(prodp, prodp, up, size);
  97                 } else
  98                         cy = mpihelp_addmul_1(prodp, up, size, v_limb);
  99
 100                 prodp[size] = cy;
 101                 prodp++;
 102         }
 103
 104         return cy;
 105 }
 106
 107 static void
 108 mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
 109                 mpi_size_t size, mpi_ptr_t tspace)
 110 {
 111         if (size & 1) {
 112                 /* The size is odd, and the code below doesn't handle that.
 113                  * Multiply the least significant (size - 1) limbs with a recursive
 114                  * call, and handle the most significant limb of S1 and S2
 115                  * separately.
 116                  * A slightly faster way to do this would be to make the Karatsuba
 117                  * code below behave as if the size were even, and let it check for
 118                  * odd size in the end.  I.e., in essence move this code to the end.
 119                  * Doing so would save us a recursive call, and potentially make the
 120                  * stack grow a lot less.
 121                  */
 122                 mpi_size_t esize = size - 1;    /* even size */
 123                 mpi_limb_t cy_limb;
 124
 125                 MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
 126                 cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
 127                 prodp[esize + esize] = cy_limb;
 128                 cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
 129                 prodp[esize + size] = cy_limb;
 130         } else {
 131                 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 132                  *
 133                  * Split U in two pieces, U1 and U0, such that
 134                  * U = U0 + U1*(B**n),
 135                  * and V in V1 and V0, such that
 136                  * V = V0 + V1*(B**n).
 137                  *
 138                  * UV is then computed recursively using the identity
 139                  *
 140                  *        2n   n          n                     n
 141                  * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 142                  *                1 1        1  0   0  1              0 0
 143                  *
 144                  * Where B = 2**BITS_PER_MP_LIMB.
 145                  */
 146                 mpi_size_t hsize = size >> 1;
 147                 mpi_limb_t cy;
 148                 int negflg;
 149
 150                 /* Product H.      ________________  ________________
 151                  *                |_____U1 x V1____||____U0 x V0_____|
 152                  * Put result in upper part of PROD and pass low part of TSPACE
 153                  * as new TSPACE.
 154                  */
 155                 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
 156                                   tspace);
 157
 158                 /* Product M.      ________________
 159                  *                |_(U1-U0)(V0-V1)_|
 160                  */
 161                 if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
 162                         mpihelp_sub_n(prodp, up + hsize, up, hsize);
 163                         negflg = 0;
 164                 } else {
 165                         mpihelp_sub_n(prodp, up, up + hsize, hsize);
 166                         negflg = 1;
 167                 }
 168                 if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
 169                         mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
 170                         negflg ^= 1;
 171                 } else {
 172                         mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
 173                         /* No change of NEGFLG.  */
 174                 }
 175                 /* Read temporary operands from low part of PROD.
 176                  * Put result in low part of TSPACE using upper part of TSPACE
 177                  * as new TSPACE.
 178                  */
 179                 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
 180                                   tspace + size);
 181
 182                 /* Add/copy product H. */
 183                 MPN_COPY(prodp + hsize, prodp + size, hsize);
 184                 cy = mpihelp_add_n(prodp + size, prodp + size,
 185                                    prodp + size + hsize, hsize);
 186
 187                 /* Add product M (if NEGFLG M is a negative number) */
 188                 if (negflg)
 189                         cy -=
 190                             mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
 191                                           size);
 192                 else
 193                         cy +=
 194                             mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
 195                                           size);
 196
 197                 /* Product L.      ________________  ________________
 198                  *                |________________||____U0 x V0_____|
 199                  * Read temporary operands from low part of PROD.
 200                  * Put result in low part of TSPACE using upper part of TSPACE
 201                  * as new TSPACE.
 202                  */
 203                 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
 204
 205                 /* Add/copy Product L (twice) */
 206
 207                 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
 208                 if (cy)
 209                         mpihelp_add_1(prodp + hsize + size,
 210                                       prodp + hsize + size, hsize, cy);
 211
 212                 MPN_COPY(prodp, tspace, hsize);
 213                 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
 214                                    hsize);
 215                 if (cy)
 216                         mpihelp_add_1(prodp + size, prodp + size, size, 1);
 217         }
 218 }
 219
 220 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
 221 {
 222         mpi_size_t i;
 223         mpi_limb_t cy_limb;
 224         mpi_limb_t v_limb;
 225
 226         /* Multiply by the first limb in V separately, as the result can be
 227          * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 228         v_limb = up[0];
 229         if (v_limb <= 1) {
 230                 if (v_limb == 1)
 231                         MPN_COPY(prodp, up, size);
 232                 else
 233                         MPN_ZERO(prodp, size);
 234                 cy_limb = 0;
 235         } else
 236                 cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
 237
 238         prodp[size] = cy_limb;
 239         prodp++;
 240
 241         /* For each iteration in the outer loop, multiply one limb from
 242          * U with one limb from V, and add it to PROD.  */
 243         for (i = 1; i < size; i++) {
 244                 v_limb = up[i];
 245                 if (v_limb <= 1) {
 246                         cy_limb = 0;
 247                         if (v_limb == 1)
 248                                 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
 249                 } else
 250                         cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
 251
 252                 prodp[size] = cy_limb;
 253                 prodp++;
 254         }
 255 }
 256
 257 void
 258 mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 259 {
 260         if (size & 1) {
 261                 /* The size is odd, and the code below doesn't handle that.
 262                  * Multiply the least significant (size - 1) limbs with a recursive
 263                  * call, and handle the most significant limb of S1 and S2
 264                  * separately.
 265                  * A slightly faster way to do this would be to make the Karatsuba
 266                  * code below behave as if the size were even, and let it check for
 267                  * odd size in the end.  I.e., in essence move this code to the end.
 268                  * Doing so would save us a recursive call, and potentially make the
 269                  * stack grow a lot less.
 270                  */
 271                 mpi_size_t esize = size - 1;    /* even size */
 272                 mpi_limb_t cy_limb;
 273
 274                 MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
 275                 cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
 276                 prodp[esize + esize] = cy_limb;
 277                 cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
 278
 279                 prodp[esize + size] = cy_limb;
 280         } else {
 281                 mpi_size_t hsize = size >> 1;
 282                 mpi_limb_t cy;
 283
 284                 /* Product H.      ________________  ________________
 285                  *                |_____U1 x U1____||____U0 x U0_____|
 286                  * Put result in upper part of PROD and pass low part of TSPACE
 287                  * as new TSPACE.
 288                  */
 289                 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
 290
 291                 /* Product M.      ________________
 292                  *                |_(U1-U0)(U0-U1)_|
 293                  */
 294                 if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
 295                         mpihelp_sub_n(prodp, up + hsize, up, hsize);
 296                 else
 297                         mpihelp_sub_n(prodp, up, up + hsize, hsize);
 298
 299                 /* Read temporary operands from low part of PROD.
 300                  * Put result in low part of TSPACE using upper part of TSPACE
 301                  * as new TSPACE.  */
 302                 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
 303
 304                 /* Add/copy product H  */
 305                 MPN_COPY(prodp + hsize, prodp + size, hsize);
 306                 cy = mpihelp_add_n(prodp + size, prodp + size,
 307                                    prodp + size + hsize, hsize);
 308
 309                 /* Add product M (if NEGFLG M is a negative number).  */
 310                 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
 311
 312                 /* Product L.      ________________  ________________
 313                  *                |________________||____U0 x U0_____|
 314                  * Read temporary operands from low part of PROD.
 315                  * Put result in low part of TSPACE using upper part of TSPACE
 316                  * as new TSPACE.  */
 317                 MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
 318
 319                 /* Add/copy Product L (twice).  */
 320                 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
 321                 if (cy)
 322                         mpihelp_add_1(prodp + hsize + size,
 323                                       prodp + hsize + size, hsize, cy);
 324
 325                 MPN_COPY(prodp, tspace, hsize);
 326                 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
 327                                    hsize);
 328                 if (cy)
 329                         mpihelp_add_1(prodp + size, prodp + size, size, 1);
 330         }
 331 }
 332
 333 /* This should be made into an inline function in gmp.h.  */
 334 int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
 335 {
 336         if (up == vp) {
 337                 if (size < KARATSUBA_THRESHOLD)
 338                         mpih_sqr_n_basecase(prodp, up, size);
 339                 else {
 340                         mpi_ptr_t tspace;
 341                         tspace = mpi_alloc_limb_space(2 * size);
 342                         if (!tspace)
 343                                 return -ENOMEM;
 344                         mpih_sqr_n(prodp, up, size, tspace);
 345                         mpi_free_limb_space(tspace);
 346                 }
 347         } else {
 348                 if (size < KARATSUBA_THRESHOLD)
 349                         mul_n_basecase(prodp, up, vp, size);
 350                 else {
 351                         mpi_ptr_t tspace;
 352                         tspace = mpi_alloc_limb_space(2 * size);
 353                         if (!tspace)
 354                                 return -ENOMEM;
 355                         mul_n(prodp, up, vp, size, tspace);
 356                         mpi_free_limb_space(tspace);
 357                 }
 358         }
 359
 360         return 0;
 361 }
 362
 363 int
 364 mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 365                            mpi_ptr_t up, mpi_size_t usize,
 366                            mpi_ptr_t vp, mpi_size_t vsize,
 367                            struct karatsuba_ctx *ctx)
 368 {
 369         mpi_limb_t cy;
 370
 371         if (!ctx->tspace || ctx->tspace_size < vsize) {
 372                 if (ctx->tspace)
 373                         mpi_free_limb_space(ctx->tspace);
 374                 ctx->tspace = mpi_alloc_limb_space(2 * vsize);
 375                 if (!ctx->tspace)
 376                         return -ENOMEM;
 377                 ctx->tspace_size = vsize;
 378         }
 379
 380         MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
 381
 382         prodp += vsize;
 383         up += vsize;
 384         usize -= vsize;
 385         if (usize >= vsize) {
 386                 if (!ctx->tp || ctx->tp_size < vsize) {
 387                         if (ctx->tp)
 388                                 mpi_free_limb_space(ctx->tp);
 389                         ctx->tp = mpi_alloc_limb_space(2 * vsize);
 390                         if (!ctx->tp) {
 391                                 if (ctx->tspace)
 392                                         mpi_free_limb_space(ctx->tspace);
 393                                 ctx->tspace = NULL;
 394                                 return -ENOMEM;
 395                         }
 396                         ctx->tp_size = vsize;
 397                 }
 398
 399                 do {
 400                         MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
 401                         cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
 402                         mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
 403                                       cy);
 404                         prodp += vsize;
 405                         up += vsize;
 406                         usize -= vsize;
 407                 } while (usize >= vsize);
 408         }
 409
 410         if (usize) {
 411                 if (usize < KARATSUBA_THRESHOLD) {
 412                         mpi_limb_t tmp;
 413                         if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
 414                             < 0)
 415                                 return -ENOMEM;
 416                 } else {
 417                         if (!ctx->next) {
 418                                 ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
 419                                 if (!ctx->next)
 420                                         return -ENOMEM;
 421                         }
 422                         if (mpihelp_mul_karatsuba_case(ctx->tspace,
 423                                                        vp, vsize,
 424                                                        up, usize,
 425                                                        ctx->next) < 0)
 426                                 return -ENOMEM;
 427                 }
 428
 429                 cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
 430                 mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
 431         }
 432
 433         return 0;
 434 }
 435
 436 void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
 437 {
 438         struct karatsuba_ctx *ctx2;
 439
 440         if (ctx->tp)
 441                 mpi_free_limb_space(ctx->tp);
 442         if (ctx->tspace)
 443                 mpi_free_limb_space(ctx->tspace);
 444         for (ctx = ctx->next; ctx; ctx = ctx2) {
 445                 ctx2 = ctx->next;
 446                 if (ctx->tp)
 447                         mpi_free_limb_space(ctx->tp);
 448                 if (ctx->tspace)
 449                         mpi_free_limb_space(ctx->tspace);
 450                 kfree(ctx);
 451         }
 452 }
 453
 454 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
 455  * and v (pointed to by VP, with VSIZE limbs), and store the result at
 456  * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
 457  * operands are normalized.  Return the most significant limb of the
 458  * result.
 459  *
 460  * NOTE: The space pointed to by PRODP is overwritten before finished
 461  * with U and V, so overlap is an error.
 462  *
 463  * Argument constraints:
 464  * 1. USIZE >= VSIZE.
 465  * 2. PRODP != UP and PRODP != VP, i.e. the destination
 466  *    must be distinct from the multiplier and the multiplicand.
 467  */
 468
 469 int
 470 mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 471             mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
 472 {
 473         mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
 474         mpi_limb_t cy;
 475         struct karatsuba_ctx ctx;
 476
 477         if (vsize < KARATSUBA_THRESHOLD) {
 478                 mpi_size_t i;
 479                 mpi_limb_t v_limb;
 480
 481                 if (!vsize) {
 482                         *_result = 0;
 483                         return 0;
 484                 }
 485
 486                 /* Multiply by the first limb in V separately, as the result can be
 487                  * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 488                 v_limb = vp[0];
 489                 if (v_limb <= 1) {
 490                         if (v_limb == 1)
 491                                 MPN_COPY(prodp, up, usize);
 492                         else
 493                                 MPN_ZERO(prodp, usize);
 494                         cy = 0;
 495                 } else
 496                         cy = mpihelp_mul_1(prodp, up, usize, v_limb);
 497
 498                 prodp[usize] = cy;
 499                 prodp++;
 500
 501                 /* For each iteration in the outer loop, multiply one limb from
 502                  * U with one limb from V, and add it to PROD.  */
 503                 for (i = 1; i < vsize; i++) {
 504                         v_limb = vp[i];
 505                         if (v_limb <= 1) {
 506                                 cy = 0;
 507                                 if (v_limb == 1)
 508                                         cy = mpihelp_add_n(prodp, prodp, up,
 509                                                            usize);
 510                         } else
 511                                 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
 512
 513                         prodp[usize] = cy;
 514                         prodp++;
 515                 }
 516
 517                 *_result = cy;
 518                 return 0;
 519         }
 520
 521         memset(&ctx, 0, sizeof ctx);
 522         if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
 523                 return -ENOMEM;
 524         mpihelp_release_karatsuba_ctx(&ctx);
 525         *_result = *prod_endp;
 526         return 0;
 527 }