import less(1)
[unleashed/tickless.git] / usr / src / lib / libc / port / fp / __x_power.c
blob137f2a19e53353b8dcfb8263d25f9b56bba1a9f9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include "lint.h"
30 #include "base_conversion.h"
31 #include <sys/types.h>
32 #include <malloc.h>
33 #include <memory.h>
34 #include <stdlib.h>
35 #include <errno.h>
38 * Multiply a _big_float by a power of two or ten
41 /* see comment in double_decim.c */
42 static unsigned int
43 __quorem10000(unsigned int x, unsigned short *pr)
45 *pr = x % 10000;
46 return (x / 10000);
50 * Multiply a base-2**16 significand by multiplier. Extend length as
51 * necessary to accommodate carries.
53 static void
54 __multiply_base_two(_big_float *pbf, unsigned short multiplier)
56 unsigned int p, carry;
57 int j, length = pbf->blength;
59 carry = 0;
60 for (j = 0; j < length; j++) {
61 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
62 pbf->bsignificand[j] = p & 0xffff;
63 carry = p >> 16;
65 if (carry != 0)
66 pbf->bsignificand[j++] = carry;
67 pbf->blength = j;
71 * Multiply a base-10**4 significand by multiplier. Extend length as
72 * necessary to accommodate carries.
74 static void
75 __multiply_base_ten(_big_float *pbf, unsigned short multiplier)
77 unsigned int p, carry;
78 int j, length = pbf->blength;
80 carry = 0;
81 for (j = 0; j < length; j++) {
82 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
83 carry = __quorem10000(p, &pbf->bsignificand[j]);
85 while (carry != 0) {
86 carry = __quorem10000(carry, &pbf->bsignificand[j]);
87 j++;
89 pbf->blength = j;
93 * Multiply a base-10**4 significand by 2**multiplier. Extend length
94 * as necessary to accommodate carries.
96 static void
97 __multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
99 unsigned int p, carry;
100 int j, length = pbf->blength;
102 carry = 0;
103 for (j = 0; j < length; j++) {
104 p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
105 carry = __quorem10000(p, &pbf->bsignificand[j]);
107 while (carry != 0) {
108 carry = __quorem10000(carry, &pbf->bsignificand[j]);
109 j++;
111 pbf->blength = j;
115 * Propagate carries in a base-2**16 significand.
117 static void
118 __carry_propagate_two(unsigned int carry, unsigned short *psignificand)
120 unsigned int p;
121 int j;
123 j = 0;
124 while (carry != 0) {
125 p = psignificand[j] + carry;
126 psignificand[j++] = p & 0xffff;
127 carry = p >> 16;
132 * Propagate carries in a base-10**4 significand.
134 static void
135 __carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
137 unsigned int p;
138 int j;
140 j = 0;
141 while (carry != 0) {
142 p = psignificand[j] + carry;
143 carry = __quorem10000(p, &psignificand[j]);
144 j++;
149 * Given x[] and y[], base-2**16 vectors of length n, compute the
150 * dot product
152 * sum (i=0,n-1) of x[i]*y[n-1-i]
154 * The product may fill as many as three base-2**16 digits; product[0]
155 * is the least significant, product[2] the most.
157 static void
158 __multiply_base_two_vector(unsigned short n, unsigned short *px,
159 unsigned short *py, unsigned short *product)
162 unsigned int acc, p;
163 unsigned short carry;
164 int i;
166 acc = 0;
167 carry = 0;
168 for (i = 0; i < (int)n; i++) {
169 p = px[i] * py[n - 1 - i] + acc;
170 if (p < acc)
171 carry++;
172 acc = p;
174 product[0] = acc & 0xffff;
175 product[1] = acc >> 16;
176 product[2] = carry;
180 * Given x[] and y[], base-10**4 vectors of length n, compute the
181 * dot product
183 * sum (i=0,n-1) of x[i]*y[n-1-i]
185 * The product may fill as many as three base-10**4 digits; product[0]
186 * is the least significant, product[2] the most.
188 #define ABASE 3000000000u /* base of accumulator */
190 static void
191 __multiply_base_ten_vector(unsigned short n, unsigned short *px,
192 unsigned short *py, unsigned short *product)
195 unsigned int acc;
196 unsigned short carry;
197 int i;
199 acc = 0;
200 carry = 0;
201 for (i = 0; i < (int)n; i++) {
202 acc = px[i] * py[n - 1 - i] + acc;
203 if (acc >= ABASE) {
204 carry++;
205 acc -= ABASE;
208 product[0] = acc % 10000;
209 acc = acc / 10000;
210 product[1] = acc % 10000;
211 acc = acc / 10000;
212 product[2] = acc + (ABASE / 100000000) * carry;
216 * Multiply *pbf by the n-th power of mult, which must be two or
217 * ten. If mult is two, *pbf is assumed to be base 10**4; if mult
218 * is ten, *pbf is assumed to be base 2**16. precision specifies
219 * the number of significant bits or decimal digits required in the
220 * result. (The product may have more or fewer digits than this,
221 * but it will be accurate to at least this many.)
223 * On exit, if the product is small enough, it overwrites *pbf, and
224 * *pnewbf is set to pbf. If the product is too large to fit in *pbf,
225 * this routine calls malloc(3M) to allocate storage and sets *pnewbf
226 * to point to this area; it is the caller's responsibility to free
227 * this storage when it is no longer needed. Note that *pbf may be
228 * modified even when the routine allocates new storage.
230 * If n is too large, we set errno to ERANGE and call abort(3C).
231 * If an attempted malloc fails, we set errno to ENOMEM and call
232 * abort(3C).
234 void
235 __big_float_times_power(_big_float *pbf, int mult, int n, int precision,
236 _big_float **pnewbf)
238 int base, needed_precision, productsize;
239 int i, j, itlast, trailing_zeros_to_delete;
240 int tablepower[3], length[3];
241 int lengthx, lengthp, istart, istop;
242 int excess_check;
243 unsigned short *pp, *table[3], canquit;
244 unsigned short multiplier, product[3];
246 if (pbf->blength == 0) {
247 *pnewbf = pbf;
248 return;
251 if (mult == 2) {
253 * Handle small n cases that don't require extra
254 * storage quickly.
256 if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
257 __multiply_base_ten_by_two(pbf, n);
258 *pnewbf = pbf;
259 return;
262 /* *pbf is base 10**4 */
263 base = 10;
266 * Convert precision (base ten digits) to needed_precision
267 * (base 10**4 digits), allowing an additional digit at
268 * each end.
270 needed_precision = 2 + (precision >> 2);
273 * Set up pointers to the table entries and compute their
274 * lengths.
276 if (n < __TBL_2_SMALL_SIZE) {
277 itlast = 0;
278 tablepower[0] = n;
279 tablepower[1] = tablepower[2] = 0;
280 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
281 itlast = 1;
282 tablepower[0] = n % __TBL_2_SMALL_SIZE;
283 tablepower[1] = n / __TBL_2_SMALL_SIZE;
284 tablepower[2] = 0;
285 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
286 __TBL_2_HUGE_SIZE)) {
287 itlast = 2;
288 tablepower[0] = n % __TBL_2_SMALL_SIZE;
289 n /= __TBL_2_SMALL_SIZE;
290 tablepower[1] = n % __TBL_2_BIG_SIZE;
291 tablepower[2] = n / __TBL_2_BIG_SIZE;
292 } else {
293 errno = ERANGE;
294 abort();
296 pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
297 table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
298 length[0] = pp[1] - pp[0];
299 pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
300 table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
301 length[1] = pp[1] - pp[0];
302 pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
303 table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
304 length[2] = pp[1] - pp[0];
305 } else {
306 if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
307 pbf->bexponent += (short)n;
308 __multiply_base_two(pbf, __tbl_10_small_digits[n]);
309 *pnewbf = pbf;
310 return;
313 /* *pbf is base 2**16 */
314 base = 2;
315 pbf->bexponent += (short)n; /* now need to multiply by 5**n */
316 needed_precision = 2 + (precision >> 4);
317 if (n < __TBL_10_SMALL_SIZE) {
318 itlast = 0;
319 tablepower[0] = n;
320 tablepower[1] = tablepower[2] = 0;
321 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
322 itlast = 1;
323 tablepower[0] = n % __TBL_10_SMALL_SIZE;
324 tablepower[1] = n / __TBL_10_SMALL_SIZE;
325 tablepower[2] = 0;
326 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
327 __TBL_10_HUGE_SIZE)) {
328 itlast = 2;
329 tablepower[0] = n % __TBL_10_SMALL_SIZE;
330 n /= __TBL_10_SMALL_SIZE;
331 tablepower[1] = n % __TBL_10_BIG_SIZE;
332 tablepower[2] = n / __TBL_10_BIG_SIZE;
333 } else {
334 errno = ERANGE;
335 abort();
337 pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
338 table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
339 length[0] = pp[1] - pp[0];
340 pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
341 table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
342 length[1] = pp[1] - pp[0];
343 pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
344 table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
345 length[2] = pp[1] - pp[0];
348 /* compute an upper bound on the size of the product */
349 productsize = pbf->blength;
350 for (i = 0; i <= itlast; i++)
351 productsize += length[i];
353 if (productsize < needed_precision)
354 needed_precision = productsize;
356 if (productsize <= pbf->bsize) {
357 *pnewbf = pbf;
358 } else {
359 i = sizeof (_big_float) + sizeof (unsigned short) *
360 (productsize - _BIG_FLOAT_SIZE);
361 if ((*pnewbf = malloc(i)) == NULL) {
362 errno = ENOMEM;
363 abort();
365 (void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
366 pbf->blength * sizeof (unsigned short));
367 (*pnewbf)->blength = pbf->blength;
368 (*pnewbf)->bexponent = pbf->bexponent;
369 pbf = *pnewbf;
370 pbf->bsize = productsize;
374 * Now pbf points to the input and the output. Step through
375 * each level of the tables.
377 for (i = 0; i <= itlast; i++) {
378 if (tablepower[i] == 0)
379 continue;
381 lengthp = length[i];
382 if (lengthp == 1) {
383 /* short multiplier (<= 10**4 or 2**13) */
384 if (base == 10) {
385 /* left shift by tablepower[i] */
386 __multiply_base_ten_by_two(pbf, tablepower[i]);
387 } else {
388 __multiply_base_two(pbf, (table[i])[0]);
390 continue;
393 lengthx = pbf->blength;
394 if (lengthx == 1) {
395 /* short multiplicand */
396 multiplier = pbf->bsignificand[0];
397 (void) memcpy(pbf->bsignificand, table[i],
398 lengthp * sizeof (unsigned short));
399 pbf->blength = lengthp;
400 if (base == 10)
401 __multiply_base_ten(pbf, multiplier);
402 else
403 __multiply_base_two(pbf, multiplier);
404 continue;
407 /* keep track of trailing zeroes */
408 trailing_zeros_to_delete = 0;
410 /* initialize for carry propagation */
411 pbf->bsignificand[lengthx + lengthp - 1] = 0;
414 * General case - the result will be accumulated in *pbf
415 * from most significant digit to least significant.
417 for (j = lengthx + lengthp - 2; j >= 0; j--) {
418 istart = j - lengthp + 1;
419 if (istart < 0)
420 istart = 0;
422 istop = lengthx - 1;
423 if (istop > j)
424 istop = j;
426 pp = table[i];
427 if (base == 2) {
428 __multiply_base_two_vector(istop - istart + 1,
429 &(pbf->bsignificand[istart]),
430 &(pp[j - istop]), product);
431 if (product[2] != 0)
432 __carry_propagate_two(
433 (unsigned int)product[2],
434 &(pbf->bsignificand[j + 2]));
435 if (product[1] != 0)
436 __carry_propagate_two(
437 (unsigned int)product[1],
438 &(pbf->bsignificand[j + 1]));
439 } else {
440 __multiply_base_ten_vector(istop - istart + 1,
441 &(pbf->bsignificand[istart]),
442 &(pp[j - istop]), product);
443 if (product[2] != 0)
444 __carry_propagate_ten(
445 (unsigned int)product[2],
446 &(pbf->bsignificand[j + 2]));
447 if (product[1] != 0)
448 __carry_propagate_ten(
449 (unsigned int)product[1],
450 &(pbf->bsignificand[j + 1]));
452 pbf->bsignificand[j] = product[0];
453 if (i < itlast || j > lengthx + lengthp - 4
454 - needed_precision)
455 continue;
458 * On the last multiplication, it's not necessary
459 * to develop the entire product if further digits
460 * can't possibly affect significant digits. But
461 * note that further digits can affect the product
462 * in one of two ways: (i) the sum of digits beyond
463 * the significant ones can cause a carry that would
464 * propagate into the significant digits, or (ii) no
465 * carry will occur, but there may be more nonzero
466 * digits that will need to be recorded in a sticky
467 * bit.
469 excess_check = lengthx + lengthp;
470 if (pbf->bsignificand[excess_check - 1] == 0)
471 excess_check--;
472 excess_check -= needed_precision + 4;
473 canquit = ((base == 2)? 65535 : 9999) -
474 ((lengthx < lengthp)? lengthx : lengthp);
476 * If j <= excess_check, then we have all the
477 * significant digits. If the (j + 1)-st digit
478 * is no larger than canquit, then the sum of the
479 * digits not yet computed can't carry into the
480 * significant digits. If the j-th and (j + 1)-st
481 * digits are not both zero, then we know we are
482 * discarding nonzero digits. (If both of these
483 * digits are zero, we need to keep forming more
484 * of the product to see whether or not there are
485 * any more nonzero digits.)
487 if (j <= excess_check &&
488 pbf->bsignificand[j + 1] <= canquit &&
489 (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
490 != 0) {
491 /* can discard j+1, j, ... 0 */
492 trailing_zeros_to_delete = j + 2;
494 /* set sticky bit */
495 pbf->bsignificand[j + 2] |= 1;
496 break;
500 /* if the product didn't carry, delete the leading zero */
501 pbf->blength = lengthx + lengthp;
502 if (pbf->bsignificand[pbf->blength - 1] == 0)
503 pbf->blength--;
505 /* look for additional trailing zeros to delete */
506 for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
507 trailing_zeros_to_delete++)
508 continue;
510 if (trailing_zeros_to_delete > 0) {
511 for (j = 0; j < (int)pbf->blength -
512 trailing_zeros_to_delete; j++) {
513 pbf->bsignificand[j] = pbf->bsignificand[j
514 + trailing_zeros_to_delete];
516 pbf->blength -= trailing_zeros_to_delete;
517 pbf->bexponent += trailing_zeros_to_delete <<
518 ((base == 2)? 4 : 2);