usr/src/lib/libc/port/fp/__x_power.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 #include "lint.h"
  30 #include "base_conversion.h"
  31 #include <sys/types.h>
  32 #include <malloc.h>
  33 #include <memory.h>
  34 #include <stdlib.h>
  35 #include <errno.h>
  36
  37 /*
  38  * Multiply a _big_float by a power of two or ten
  39  */
  40
  41 /* see comment in double_decim.c */
  42 static unsigned int
  43 __quorem10000(unsigned int x, unsigned short *pr)
  44 {
  45         *pr = x % 10000;
  46         return (x / 10000);
  47 }
  48
  49 /*
  50  * Multiply a base-2**16 significand by multiplier.  Extend length as
  51  * necessary to accommodate carries.
  52  */
  53 static void
  54 __multiply_base_two(_big_float *pbf, unsigned short multiplier)
  55 {
  56         unsigned int    p, carry;
  57         int             j, length = pbf->blength;
  58
  59         carry = 0;
  60         for (j = 0; j < length; j++) {
  61                 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
  62                 pbf->bsignificand[j] = p & 0xffff;
  63                 carry = p >> 16;
  64         }
  65         if (carry != 0)
  66                 pbf->bsignificand[j++] = carry;
  67         pbf->blength = j;
  68 }
  69
  70 /*
  71  * Multiply a base-10**4 significand by multiplier.  Extend length as
  72  * necessary to accommodate carries.
  73  */
  74 static void
  75 __multiply_base_ten(_big_float *pbf, unsigned short multiplier)
  76 {
  77         unsigned int    p, carry;
  78         int             j, length = pbf->blength;
  79
  80         carry = 0;
  81         for (j = 0; j < length; j++) {
  82                 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
  83                 carry = __quorem10000(p, &pbf->bsignificand[j]);
  84         }
  85         while (carry != 0) {
  86                 carry = __quorem10000(carry, &pbf->bsignificand[j]);
  87                 j++;
  88         }
  89         pbf->blength = j;
  90 }
  91
  92 /*
  93  * Multiply a base-10**4 significand by 2**multiplier.  Extend length
  94  * as necessary to accommodate carries.
  95  */
  96 static void
  97 __multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
  98 {
  99         unsigned int    p, carry;
 100         int             j, length = pbf->blength;
 101
 102         carry = 0;
 103         for (j = 0; j < length; j++) {
 104                 p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
 105                 carry = __quorem10000(p, &pbf->bsignificand[j]);
 106         }
 107         while (carry != 0) {
 108                 carry = __quorem10000(carry, &pbf->bsignificand[j]);
 109                 j++;
 110         }
 111         pbf->blength = j;
 112 }
 113
 114 /*
 115  * Propagate carries in a base-2**16 significand.
 116  */
 117 static void
 118 __carry_propagate_two(unsigned int carry, unsigned short *psignificand)
 119 {
 120         unsigned int    p;
 121         int             j;
 122
 123         j = 0;
 124         while (carry != 0) {
 125                 p = psignificand[j] + carry;
 126                 psignificand[j++] = p & 0xffff;
 127                 carry = p >> 16;
 128         }
 129 }
 130
 131 /*
 132  * Propagate carries in a base-10**4 significand.
 133  */
 134 static void
 135 __carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
 136 {
 137         unsigned int    p;
 138         int             j;
 139
 140         j = 0;
 141         while (carry != 0) {
 142                 p = psignificand[j] + carry;
 143                 carry = __quorem10000(p, &psignificand[j]);
 144                 j++;
 145         }
 146 }
 147
 148 /*
 149  * Given x[] and y[], base-2**16 vectors of length n, compute the
 150  * dot product
 151  *
 152  * sum (i=0,n-1) of x[i]*y[n-1-i]
 153  *
 154  * The product may fill as many as three base-2**16 digits; product[0]
 155  * is the least significant, product[2] the most.
 156  */
 157 static void
 158 __multiply_base_two_vector(unsigned short n, unsigned short *px,
 159     unsigned short *py, unsigned short *product)
 160 {
 161
 162         unsigned int    acc, p;
 163         unsigned short  carry;
 164         int             i;
 165
 166         acc = 0;
 167         carry = 0;
 168         for (i = 0; i < (int)n; i++) {
 169                 p = px[i] * py[n - 1 - i] + acc;
 170                 if (p < acc)
 171                         carry++;
 172                 acc = p;
 173         }
 174         product[0] = acc & 0xffff;
 175         product[1] = acc >> 16;
 176         product[2] = carry;
 177 }
 178
 179 /*
 180  * Given x[] and y[], base-10**4 vectors of length n, compute the
 181  * dot product
 182  *
 183  * sum (i=0,n-1) of x[i]*y[n-1-i]
 184  *
 185  * The product may fill as many as three base-10**4 digits; product[0]
 186  * is the least significant, product[2] the most.
 187  */
 188 #define ABASE   3000000000u     /* base of accumulator */
 189
 190 static void
 191 __multiply_base_ten_vector(unsigned short n, unsigned short *px,
 192     unsigned short *py, unsigned short *product)
 193 {
 194
 195         unsigned int    acc;
 196         unsigned short  carry;
 197         int             i;
 198
 199         acc = 0;
 200         carry = 0;
 201         for (i = 0; i < (int)n; i++) {
 202                 acc = px[i] * py[n - 1 - i] + acc;
 203                 if (acc >= ABASE) {
 204                         carry++;
 205                         acc -= ABASE;
 206                 }
 207         }
 208         product[0] = acc % 10000;
 209         acc = acc / 10000;
 210         product[1] = acc % 10000;
 211         acc = acc / 10000;
 212         product[2] = acc + (ABASE / 100000000) * carry;
 213 }
 214
 215 /*
 216  * Multiply *pbf by the n-th power of mult, which must be two or
 217  * ten.  If mult is two, *pbf is assumed to be base 10**4; if mult
 218  * is ten, *pbf is assumed to be base 2**16.  precision specifies
 219  * the number of significant bits or decimal digits required in the
 220  * result.  (The product may have more or fewer digits than this,
 221  * but it will be accurate to at least this many.)
 222  *
 223  * On exit, if the product is small enough, it overwrites *pbf, and
 224  * *pnewbf is set to pbf.  If the product is too large to fit in *pbf,
 225  * this routine calls malloc(3M) to allocate storage and sets *pnewbf
 226  * to point to this area; it is the caller's responsibility to free
 227  * this storage when it is no longer needed.  Note that *pbf may be
 228  * modified even when the routine allocates new storage.
 229  *
 230  * If n is too large, we set errno to ERANGE and call abort(3C).
 231  * If an attempted malloc fails, we set errno to ENOMEM and call
 232  * abort(3C).
 233  */
 234 void
 235 __big_float_times_power(_big_float *pbf, int mult, int n, int precision,
 236     _big_float **pnewbf)
 237 {
 238         int             base, needed_precision, productsize;
 239         int             i, j, itlast, trailing_zeros_to_delete;
 240         int             tablepower[3], length[3];
 241         int             lengthx, lengthp, istart, istop;
 242         int             excess_check;
 243         unsigned short  *pp, *table[3], canquit;
 244         unsigned short  multiplier, product[3];
 245
 246         if (pbf->blength == 0) {
 247                 *pnewbf = pbf;
 248                 return;
 249         }
 250
 251         if (mult == 2) {
 252                 /*
 253                  * Handle small n cases that don't require extra
 254                  * storage quickly.
 255                  */
 256                 if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
 257                         __multiply_base_ten_by_two(pbf, n);
 258                         *pnewbf = pbf;
 259                         return;
 260                 }
 261
 262                 /* *pbf is base 10**4 */
 263                 base = 10;
 264
 265                 /*
 266                  * Convert precision (base ten digits) to needed_precision
 267                  * (base 10**4 digits), allowing an additional digit at
 268                  * each end.
 269                  */
 270                 needed_precision = 2 + (precision >> 2);
 271
 272                 /*
 273                  * Set up pointers to the table entries and compute their
 274                  * lengths.
 275                  */
 276                 if (n < __TBL_2_SMALL_SIZE) {
 277                         itlast = 0;
 278                         tablepower[0] = n;
 279                         tablepower[1] = tablepower[2] = 0;
 280                 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
 281                         itlast = 1;
 282                         tablepower[0] = n % __TBL_2_SMALL_SIZE;
 283                         tablepower[1] = n / __TBL_2_SMALL_SIZE;
 284                         tablepower[2] = 0;
 285                 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
 286                     __TBL_2_HUGE_SIZE)) {
 287                         itlast = 2;
 288                         tablepower[0] = n % __TBL_2_SMALL_SIZE;
 289                         n /= __TBL_2_SMALL_SIZE;
 290                         tablepower[1] = n % __TBL_2_BIG_SIZE;
 291                         tablepower[2] = n / __TBL_2_BIG_SIZE;
 292                 } else {
 293                         errno = ERANGE;
 294                         abort();
 295                 }
 296                 pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
 297                 table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
 298                 length[0] = pp[1] - pp[0];
 299                 pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
 300                 table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
 301                 length[1] = pp[1] - pp[0];
 302                 pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
 303                 table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
 304                 length[2] = pp[1] - pp[0];
 305         } else {
 306                 if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
 307                         pbf->bexponent += (short)n;
 308                         __multiply_base_two(pbf, __tbl_10_small_digits[n]);
 309                         *pnewbf = pbf;
 310                         return;
 311                 }
 312
 313                 /* *pbf is base 2**16 */
 314                 base = 2;
 315                 pbf->bexponent += (short)n; /* now need to multiply by 5**n */
 316                 needed_precision = 2 + (precision >> 4);
 317                 if (n < __TBL_10_SMALL_SIZE) {
 318                         itlast = 0;
 319                         tablepower[0] = n;
 320                         tablepower[1] = tablepower[2] = 0;
 321                 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
 322                         itlast = 1;
 323                         tablepower[0] = n % __TBL_10_SMALL_SIZE;
 324                         tablepower[1] = n / __TBL_10_SMALL_SIZE;
 325                         tablepower[2] = 0;
 326                 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
 327                     __TBL_10_HUGE_SIZE)) {
 328                         itlast = 2;
 329                         tablepower[0] = n % __TBL_10_SMALL_SIZE;
 330                         n /= __TBL_10_SMALL_SIZE;
 331                         tablepower[1] = n % __TBL_10_BIG_SIZE;
 332                         tablepower[2] = n / __TBL_10_BIG_SIZE;
 333                 } else {
 334                         errno = ERANGE;
 335                         abort();
 336                 }
 337                 pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
 338                 table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
 339                 length[0] = pp[1] - pp[0];
 340                 pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
 341                 table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
 342                 length[1] = pp[1] - pp[0];
 343                 pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
 344                 table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
 345                 length[2] = pp[1] - pp[0];
 346         }
 347
 348         /* compute an upper bound on the size of the product */
 349         productsize = pbf->blength;
 350         for (i = 0; i <= itlast; i++)
 351                 productsize += length[i];
 352
 353         if (productsize < needed_precision)
 354                 needed_precision = productsize;
 355
 356         if (productsize <= pbf->bsize) {
 357                 *pnewbf = pbf;
 358         } else {
 359                 i = sizeof (_big_float) + sizeof (unsigned short) *
 360                     (productsize - _BIG_FLOAT_SIZE);
 361                 if ((*pnewbf = malloc(i)) == NULL) {
 362                         errno = ENOMEM;
 363                         abort();
 364                 }
 365                 (void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
 366                     pbf->blength * sizeof (unsigned short));
 367                 (*pnewbf)->blength = pbf->blength;
 368                 (*pnewbf)->bexponent = pbf->bexponent;
 369                 pbf = *pnewbf;
 370                 pbf->bsize = productsize;
 371         }
 372
 373         /*
 374          * Now pbf points to the input and the output.  Step through
 375          * each level of the tables.
 376          */
 377         for (i = 0; i <= itlast; i++) {
 378                 if (tablepower[i] == 0)
 379                         continue;
 380
 381                 lengthp = length[i];
 382                 if (lengthp == 1) {
 383                         /* short multiplier (<= 10**4 or 2**13) */
 384                         if (base == 10) {
 385                                 /* left shift by tablepower[i] */
 386                                 __multiply_base_ten_by_two(pbf, tablepower[i]);
 387                         } else {
 388                                 __multiply_base_two(pbf, (table[i])[0]);
 389                         }
 390                         continue;
 391                 }
 392
 393                 lengthx = pbf->blength;
 394                 if (lengthx == 1) {
 395                         /* short multiplicand */
 396                         multiplier = pbf->bsignificand[0];
 397                         (void) memcpy(pbf->bsignificand, table[i],
 398                             lengthp * sizeof (unsigned short));
 399                         pbf->blength = lengthp;
 400                         if (base == 10)
 401                                 __multiply_base_ten(pbf, multiplier);
 402                         else
 403                                 __multiply_base_two(pbf, multiplier);
 404                         continue;
 405                 }
 406
 407                 /* keep track of trailing zeroes */
 408                 trailing_zeros_to_delete = 0;
 409
 410                 /* initialize for carry propagation */
 411                 pbf->bsignificand[lengthx + lengthp - 1] = 0;
 412
 413                 /*
 414                  * General case - the result will be accumulated in *pbf
 415                  * from most significant digit to least significant.
 416                  */
 417                 for (j = lengthx + lengthp - 2; j >= 0; j--) {
 418                         istart = j - lengthp + 1;
 419                         if (istart < 0)
 420                                 istart = 0;
 421
 422                         istop = lengthx - 1;
 423                         if (istop > j)
 424                                 istop = j;
 425
 426                         pp = table[i];
 427                         if (base == 2) {
 428                                 __multiply_base_two_vector(istop - istart + 1,
 429                                     &(pbf->bsignificand[istart]),
 430                                     &(pp[j - istop]), product);
 431                                 if (product[2] != 0)
 432                                         __carry_propagate_two(
 433                                             (unsigned int)product[2],
 434                                             &(pbf->bsignificand[j + 2]));
 435                                 if (product[1] != 0)
 436                                         __carry_propagate_two(
 437                                             (unsigned int)product[1],
 438                                             &(pbf->bsignificand[j + 1]));
 439                         } else {
 440                                 __multiply_base_ten_vector(istop - istart + 1,
 441                                     &(pbf->bsignificand[istart]),
 442                                     &(pp[j - istop]), product);
 443                                 if (product[2] != 0)
 444                                         __carry_propagate_ten(
 445                                             (unsigned int)product[2],
 446                                             &(pbf->bsignificand[j + 2]));
 447                                 if (product[1] != 0)
 448                                         __carry_propagate_ten(
 449                                             (unsigned int)product[1],
 450                                             &(pbf->bsignificand[j + 1]));
 451                         }
 452                         pbf->bsignificand[j] = product[0];
 453                         if (i < itlast || j > lengthx + lengthp - 4
 454                             - needed_precision)
 455                                 continue;
 456
 457                         /*
 458                          * On the last multiplication, it's not necessary
 459                          * to develop the entire product if further digits
 460                          * can't possibly affect significant digits.  But
 461                          * note that further digits can affect the product
 462                          * in one of two ways: (i) the sum of digits beyond
 463                          * the significant ones can cause a carry that would
 464                          * propagate into the significant digits, or (ii) no
 465                          * carry will occur, but there may be more nonzero
 466                          * digits that will need to be recorded in a sticky
 467                          * bit.
 468                          */
 469                         excess_check = lengthx + lengthp;
 470                         if (pbf->bsignificand[excess_check - 1] == 0)
 471                                 excess_check--;
 472                         excess_check -= needed_precision + 4;
 473                         canquit = ((base == 2)? 65535 : 9999) -
 474                             ((lengthx < lengthp)? lengthx : lengthp);
 475                         /*
 476                          * If j <= excess_check, then we have all the
 477                          * significant digits.  If the (j + 1)-st digit
 478                          * is no larger than canquit, then the sum of the
 479                          * digits not yet computed can't carry into the
 480                          * significant digits.  If the j-th and (j + 1)-st
 481                          * digits are not both zero, then we know we are
 482                          * discarding nonzero digits.  (If both of these
 483                          * digits are zero, we need to keep forming more
 484                          * of the product to see whether or not there are
 485                          * any more nonzero digits.)
 486                          */
 487                         if (j <= excess_check &&
 488                             pbf->bsignificand[j + 1] <= canquit &&
 489                             (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
 490                             != 0) {
 491                                 /* can discard j+1, j, ... 0 */
 492                                 trailing_zeros_to_delete = j + 2;
 493
 494                                 /* set sticky bit */
 495                                 pbf->bsignificand[j + 2] |= 1;
 496                                 break;
 497                         }
 498                 }
 499
 500                 /* if the product didn't carry, delete the leading zero */
 501                 pbf->blength = lengthx + lengthp;
 502                 if (pbf->bsignificand[pbf->blength - 1] == 0)
 503                         pbf->blength--;
 504
 505                 /* look for additional trailing zeros to delete */
 506                 for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
 507                     trailing_zeros_to_delete++)
 508                         continue;
 509
 510                 if (trailing_zeros_to_delete > 0) {
 511                         for (j = 0; j < (int)pbf->blength -
 512                             trailing_zeros_to_delete; j++) {
 513                                 pbf->bsignificand[j] = pbf->bsignificand[j
 514                                     + trailing_zeros_to_delete];
 515                         }
 516                         pbf->blength -= trailing_zeros_to_delete;
 517                         pbf->bexponent += trailing_zeros_to_delete <<
 518                             ((base == 2)? 4 : 2);
 519                 }
 520         }
 521 }