2 * zmul - faster than usual multiplying and squaring routines
4 * Copyright (C) 1999-2007 David I. Bell
6 * Calc is open software; you can redistribute it and/or modify it under
7 * the terms of the version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
10 * Calc is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
13 * Public License for more details.
15 * A copy of version 2.1 of the GNU Lesser General Public License is
16 * distributed with calc under the filename COPYING-LGPL. You should have
17 * received a copy with calc; if not, write to Free Software Foundation, Inc.
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * @(#) $Revision: 30.1 $
21 * @(#) $Id: zmul.c,v 30.1 2007/03/16 11:09:46 chongo Exp $
22 * @(#) $Source: /usr/local/src/bin/calc/RCS/zmul.c,v $
24 * Under source code control: 1991/01/09 20:01:31
25 * File existed as early as: 1991
27 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
31 * Faster than usual multiplying and squaring routines.
32 * The algorithm used is the reasonably simple one from Knuth, volume 2,
33 * section 4.3.3. These recursive routines are of speed O(N^1.585)
34 * instead of O(N^2). The usual multiplication and (almost usual) squaring
35 * algorithms are used for small numbers. On a 386 with its compiler, the
36 * two algorithms are equal in speed at about 100 decimal digits.
44 STATIC HALF
*tempbuf
; /* temporary buffer for multiply and square */
46 S_FUNC LEN
domul(HALF
*v1
, LEN size1
, HALF
*v2
, LEN size2
, HALF
*ans
);
47 S_FUNC LEN
dosquare(HALF
*vp
, LEN size
, HALF
*ans
);
51 * Multiply two numbers using the following formula recursively:
52 * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
53 * where S is a power of 2^16, and so multiplies by it are shifts, and
54 * A,B,C,D are the left and right halfs of the numbers to be multiplied.
57 * z1 numbers to multiply
58 * z2 numbers to multiply
59 * res result of multiplication
62 zmul(ZVALUE z1
, ZVALUE z2
, ZVALUE
*res
)
64 LEN len
; /* size of array */
66 if (ziszero(z1
) || ziszero(z2
)) {
72 res
->sign
= (z1
.sign
!= z2
.sign
);
77 res
->sign
= (z1
.sign
!= z2
.sign
);
82 * Allocate a temporary buffer for the recursion levels to use.
83 * An array needs to be allocated large enough for all of the
84 * temporary results to fit in. This size is about twice the size
85 * of the largest original number, since each recursion level uses
86 * the size of its given number, and whose size is 1/2 the size of
87 * the previous level. The sum of the infinite series is 2.
88 * Add some extra words because of rounding when dividing by 2
89 * and also because of the extra word that each multiply needs.
95 tempbuf
= zalloctemp(len
);
97 res
->sign
= (z1
.sign
!= z2
.sign
);
98 res
->v
= alloc(z1
.len
+ z2
.len
+ 2);
99 res
->len
= domul(z1
.v
, z1
.len
, z2
.v
, z2
.len
, res
->v
);
104 * Recursive routine to multiply two numbers by splitting them up into
105 * two numbers of half the size, and using the results of multiplying the
106 * subpieces. The result is placed in the indicated array, which must be
107 * large enough for the result plus one extra word (size1 + size2 + 1).
108 * Returns the actual size of the result with leading zeroes stripped.
109 * This also uses a temporary array which must be twice as large as
110 * one more than the size of the number at the top level recursive call.
114 * size1 size of first number
116 * size2 size of second number
117 * ans location for result
120 domul(HALF
*v1
, LEN size1
, HALF
*v2
, LEN size2
, HALF
*ans
)
122 LEN shift
; /* amount numbers are shifted by */
123 LEN sizeA
; /* size of left half of first number */
124 LEN sizeB
; /* size of right half of first number */
125 LEN sizeC
; /* size of left half of second number */
126 LEN sizeD
; /* size of right half of second number */
127 LEN sizeAB
; /* size of subtraction of A and B */
128 LEN sizeDC
; /* size of subtraction of D and C */
129 LEN sizeABDC
; /* size of product of above two results */
130 LEN subsize
; /* size of difference of halfs */
131 LEN copysize
; /* size of number left to copy */
132 LEN sizetotal
; /* total size of product */
133 LEN len
; /* temporary length */
134 HALF
*baseA
; /* base of left half of first number */
135 HALF
*baseB
; /* base of right half of first number */
136 HALF
*baseC
; /* base of left half of second number */
137 HALF
*baseD
; /* base of right half of second number */
138 HALF
*baseAB
; /* base of result of subtraction of A and B */
139 HALF
*baseDC
; /* base of result of subtraction of D and C */
140 HALF
*baseABDC
; /* base of product of above two results */
141 HALF
*baseAC
; /* base of product of A and C */
142 HALF
*baseBD
; /* base of product of B and D */
143 FULL carry
; /* carry digit for small multiplications */
144 FULL carryACBD
; /* carry from addition of A*C and B*D */
145 FULL digit
; /* single digit multiplying by */
146 HALF
*temp
; /* base for temporary calculations */
147 BOOL neg
; /* whether imtermediate term is negative */
148 register HALF
*hd
, *h1
=NULL
, *h2
=NULL
; /* for inner loops */
149 SIUNION sival
; /* for addition of digits */
152 * Trim the numbers of leading zeroes and initialize the
153 * estimated size of the result.
156 while ((*hd
== 0) && (size1
> 1)) {
161 while ((*hd
== 0) && (size2
> 1)) {
165 sizetotal
= size1
+ size2
;
168 * First check for zero answer.
170 if (((size1
== 1) && (*v1
== 0)) || ((size2
== 1) && (*v2
== 0))) {
176 * Exchange the two numbers if necessary to make the number of
177 * digits of the first number be greater than or equal to the
181 len
= size1
; size1
= size2
; size2
= len
;
182 hd
= v1
; v1
= v2
; v2
= hd
;
186 * If the smaller number has only a few digits, then calculate
187 * the result in the normal manner in order to avoid the overhead
188 * of the recursion for small numbers. The number of digits where
189 * the algorithm changes is settable from 2 to maxint.
191 if (size2
< conf
->mul2
) {
193 * First clear the top part of the result, and then multiply
194 * by the lowest digit to get the first partial sum. Later
195 * products will then add into this result.
207 while (len
>= 4) { /* expand the loop some */
209 sival
.ivalue
= ((FULL
) *h1
++) * digit
+ carry
;
210 /* ignore Saber-C warning #112 - get ushort from uint */
211 /* ok to ignore on name domul`sival */
213 carry
= sival
.sihigh
;
214 sival
.ivalue
= ((FULL
) *h1
++) * digit
+ carry
;
216 carry
= sival
.sihigh
;
217 sival
.ivalue
= ((FULL
) *h1
++) * digit
+ carry
;
219 carry
= sival
.sihigh
;
220 sival
.ivalue
= ((FULL
) *h1
++) * digit
+ carry
;
222 carry
= sival
.sihigh
;
225 sival
.ivalue
= ((FULL
) *h1
++) * digit
+ carry
;
227 carry
= sival
.sihigh
;
232 * Now multiply by the remaining digits of the second number,
233 * adding each product into the final result.
236 while (--size2
> 0) {
244 while (len
>= 4) { /* expand the loop some */
246 sival
.ivalue
= ((FULL
) *h1
++) * digit
247 + ((FULL
) *hd
) + carry
;
249 carry
= sival
.sihigh
;
250 sival
.ivalue
= ((FULL
) *h1
++) * digit
251 + ((FULL
) *hd
) + carry
;
253 carry
= sival
.sihigh
;
254 sival
.ivalue
= ((FULL
) *h1
++) * digit
255 + ((FULL
) *hd
) + carry
;
257 carry
= sival
.sihigh
;
258 sival
.ivalue
= ((FULL
) *h1
++) * digit
259 + ((FULL
) *hd
) + carry
;
261 carry
= sival
.sihigh
;
264 sival
.ivalue
= ((FULL
) *h1
++) * digit
265 + ((FULL
) *hd
) + carry
;
267 carry
= sival
.sihigh
;
270 sival
.ivalue
= ((FULL
) *hd
) + carry
;
272 carry
= sival
.sihigh
;
277 * Now return the true size of the number.
281 while ((*hd
== 0) && (len
> 1)) {
289 * Need to multiply by a large number.
290 * Allocate temporary space for calculations, and calculate the
291 * value for the shift. The shift value is 1/2 the size of the
292 * larger (first) number (rounded up). The amount of temporary
293 * space needed is twice the size of the shift, plus one more word
294 * for the multiply to use.
296 shift
= (size1
+ 1) / 2;
298 tempbuf
+= (2 * shift
) + 1;
301 * Determine the sizes and locations of all the numbers.
302 * The value of sizeC can be negative, and this is checked later.
303 * The value of sizeD is limited by the full size of the number.
307 baseC
= v2
+ ((shift
<= size2
) ? shift
: size2
);
310 baseDC
= ans
+ shift
;
311 baseAC
= ans
+ shift
* 2;
314 sizeA
= size1
- shift
;
315 sizeC
= size2
- shift
;
318 hd
= &baseB
[shift
- 1];
319 while ((*hd
== 0) && (sizeB
> 1)) {
327 hd
= &baseD
[sizeD
- 1];
328 while ((*hd
== 0) && (sizeD
> 1)) {
334 * If the smaller number has a high half of zero, then calculate
335 * the result by breaking up the first number into two numbers
336 * and combining the results using the obvious formula:
337 * (A*S+B) * D = (A*D)*S + B*D
340 len
= domul(baseB
, sizeB
, baseD
, sizeD
, ans
);
342 len
= sizetotal
- len
;
347 * Add the second number into the first number, shifted
348 * over at the correct position.
350 len
= domul(baseA
, sizeA
, baseD
, sizeD
, temp
);
355 sival
.ivalue
= ((FULL
) *h1
++) + ((FULL
) *hd
) + carry
;
357 carry
= sival
.sihigh
;
360 sival
.ivalue
= ((FULL
) *hd
) + carry
;
362 carry
= sival
.sihigh
;
366 * Determine the final size of the number and return it.
370 while ((*hd
== 0) && (len
> 1)) {
379 * Now we know that the high halfs of the numbers are nonzero,
380 * so we can use the complete formula.
381 * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
382 * The steps are done in the following order:
387 * (S^2+S)*A*C + (S+1)*B*D (*)
388 * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
390 * Note: step (*) above can produce a result which is larger than
391 * the final product will be, and this is where the extra word
392 * needed in the product comes from. After the final subtraction is
393 * done, the result fits in the expected size. Using the extra word
394 * is easier than suppressing the carries and borrows everywhere.
396 * Begin by forming the product (A-B)*(D-C) into a temporary
397 * location that we save until the final step. Do each subtraction
398 * at positions 0 and S. Be very careful about the relative sizes
399 * of the numbers since this result can be negative. For the first
400 * step calculate the absolute difference of A and B into a temporary
401 * location at position 0 of the result. Negate the sign if A is
405 if (sizeA
== sizeB
) {
407 h1
= &baseA
[len
- 1];
408 h2
= &baseB
[len
- 1];
409 while ((len
> 1) && (*h1
== *h2
)) {
415 if ((sizeA
> sizeB
) || ((sizeA
== sizeB
) && h1
&& h2
&& (*h1
> *h2
))) {
427 copysize
= sizeAB
- subsize
;
432 sival
.ivalue
= BASE1
- ((FULL
) *h1
++) + ((FULL
) *h2
++) + carry
;
433 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
434 carry
= sival
.sihigh
;
437 sival
.ivalue
= (BASE1
- ((FULL
) *h1
++)) + carry
;
438 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
439 carry
= sival
.sihigh
;
442 hd
= &baseAB
[sizeAB
- 1];
443 while ((*hd
== 0) && (sizeAB
> 1)) {
449 * This completes the calculation of abs(A-B). For the next step
450 * calculate the absolute difference of D and C into a temporary
451 * location at position S of the result. Negate the sign if C is
454 if (sizeC
== sizeD
) {
456 h1
= &baseC
[len
- 1];
457 h2
= &baseD
[len
- 1];
458 while ((len
> 1) && (*h1
== *h2
)) {
464 if ((sizeC
> sizeD
) || ((sizeC
== sizeD
) && (*h1
> *h2
))) {
476 copysize
= sizeDC
- subsize
;
481 sival
.ivalue
= BASE1
- ((FULL
) *h1
++) + ((FULL
) *h2
++) + carry
;
482 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
483 carry
= sival
.sihigh
;
486 sival
.ivalue
= (BASE1
- ((FULL
) *h1
++)) + carry
;
487 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
488 carry
= sival
.sihigh
;
490 hd
= &baseDC
[sizeDC
- 1];
491 while ((*hd
== 0) && (sizeDC
> 1)) {
497 * This completes the calculation of abs(D-C). Now multiply
498 * together abs(A-B) and abs(D-C) into a temporary location,
499 * which is preserved until the final steps.
502 sizeABDC
= domul(baseAB
, sizeAB
, baseDC
, sizeDC
, baseABDC
);
505 * Now calculate B*D and A*C into one of their two final locations.
506 * Make sure the high order digits of the products are zeroed since
507 * this initializes the final result. Be careful about this zeroing
508 * since the size of the high order words might be smaller than
509 * the shift size. Do B*D first since the multiplies use one more
510 * word than the size of the product. Also zero the final extra
511 * word in the result for possible carries to use.
513 len
= domul(baseB
, sizeB
, baseD
, sizeD
, baseBD
);
515 len
= shift
* 2 - len
;
519 len
= domul(baseA
, sizeA
, baseC
, sizeC
, baseAC
);
521 len
= sizetotal
- shift
* 2 - len
+ 1;
526 * Now add in A*C and B*D into themselves at the other shifted
527 * position that they need. This addition is tricky in order to
528 * make sure that the two additions cannot interfere with each other.
529 * Therefore we first add in the top half of B*D and the lower half
530 * of A*C. The sources and destinations of these two additions
531 * overlap, and so the same answer results from the two additions,
532 * thus only two pointers suffice for both additions. Keep the
533 * final carry from these additions for later use since we cannot
534 * afford to change the top half of A*C yet.
541 sival
.ivalue
= ((FULL
) *h1
) + ((FULL
) *h2
) + carryACBD
;
544 carryACBD
= sival
.sihigh
;
548 * Now add in the bottom half of B*D and the top half of A*C.
549 * These additions are straightforward, except that A*C should
550 * be done first because of possible carries from B*D, and the
551 * top half of A*C might not exist. Add in one of the carries
552 * from the previous addition while we are at it.
557 len
= sizetotal
- 3 * shift
;
559 sival
.ivalue
= ((FULL
) *h1
++) + ((FULL
) *hd
) + carry
;
561 carry
= sival
.sihigh
;
564 sival
.ivalue
= ((FULL
) *hd
) + carry
;
566 carry
= sival
.sihigh
;
574 sival
.ivalue
= ((FULL
) *h1
++) + ((FULL
) *hd
) + carry
;
576 carry
= sival
.sihigh
;
579 sival
.ivalue
= ((FULL
) *hd
) + carry
;
581 carry
= sival
.sihigh
;
585 * Now finally add in the other delayed carry from the
590 sival
.ivalue
= ((FULL
) *hd
) + carryACBD
;
592 carryACBD
= sival
.sihigh
;
596 * Now finally add or subtract (A-B)*(D-C) into the final result at
597 * the correct position (S), according to whether it is positive or
598 * negative. When subtracting, the answer cannot go negative.
606 sival
.ivalue
= BASE1
- ((FULL
) *hd
) +
607 ((FULL
) *h1
++) + carry
;
608 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
609 carry
= sival
.sihigh
;
612 sival
.ivalue
= BASE1
- ((FULL
) *hd
) + carry
;
613 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
614 carry
= sival
.sihigh
;
618 sival
.ivalue
= ((FULL
) *h1
++) + ((FULL
) *hd
) + carry
;
620 carry
= sival
.sihigh
;
623 sival
.ivalue
= ((FULL
) *hd
) + carry
;
625 carry
= sival
.sihigh
;
630 * Finally determine the size of the final result and return that.
634 while ((*hd
== 0) && (len
> 1)) {
644 * Square a number by using the following formula recursively:
645 * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
646 * where S is a power of 2^16, and so multiplies by it are shifts,
647 * and A and B are the left and right halfs of the number to square.
650 zsquare(ZVALUE z
, ZVALUE
*res
)
664 * Allocate a temporary array if necessary for the recursion to use.
665 * The array needs to be allocated large enough for all of the
666 * temporary results to fit in. This size is about 3 times the
667 * size of the original number, since each recursion level uses 3/2
668 * of the size of its given number, and whose size is 1/2 the size
669 * of the previous level. The sum of the infinite series is 3.
670 * Allocate some extra words for rounding up the sizes.
672 len
= 3 * z
.len
+ 32;
673 tempbuf
= zalloctemp(len
);
676 res
->v
= alloc((z
.len
+2) * 2);
678 * Without the memset below, Purify reports that dosquare()
679 * will read uninitialized memory at the dosquare() line below
682 * uninitialized memory read (see zsquare)
684 * This problem occurs during regression test #622 and may
685 * be duplicated by executing:
688 * 0xffff0000ffffffff00000000ffff0000000000000000ffff^2;
690 memset((char *)res
->v
, 0, ((z
.len
+2) * 2)*sizeof(HALF
));
691 res
->len
= dosquare(z
.v
, z
.len
, res
->v
);
696 * Recursive routine to square a number by splitting it up into two numbers
697 * of half the size, and using the results of squaring the subpieces.
698 * The result is placed in the indicated array, which must be large
699 * enough for the result (size * 2). Returns the size of the result.
700 * This uses a temporary array which must be 3 times as large as the
701 * size of the number at the top level recursive call.
704 * vp value to be squared
705 * size length of value to square
706 * ans location for result
709 dosquare(HALF
*vp
, LEN size
, HALF
*ans
)
711 LEN shift
; /* amount numbers are shifted by */
712 LEN sizeA
; /* size of left half of number to square */
713 LEN sizeB
; /* size of right half of number to square */
714 LEN sizeAA
; /* size of square of left half */
715 LEN sizeBB
; /* size of square of right half */
716 LEN sizeAABB
; /* size of sum of squares of A and B */
717 LEN sizeAB
; /* size of difference of A and B */
718 LEN sizeABAB
; /* size of square of difference of A and B */
719 LEN subsize
; /* size of difference of halfs */
720 LEN copysize
; /* size of number left to copy */
721 LEN sumsize
; /* size of sum */
722 LEN sizetotal
; /* total size of square */
723 LEN len
; /* temporary length */
724 LEN len1
; /* another temporary length */
725 FULL carry
; /* carry digit for small multiplications */
726 FULL digit
; /* single digit multiplying by */
727 HALF
*temp
; /* base for temporary calculations */
728 HALF
*baseA
; /* base of left half of number */
729 HALF
*baseB
; /* base of right half of number */
730 HALF
*baseAA
; /* base of square of left half of number */
731 HALF
*baseBB
; /* base of square of right half of number */
732 HALF
*baseAABB
; /* base of sum of squares of A and B */
733 HALF
*baseAB
; /* base of difference of A and B */
734 HALF
*baseABAB
; /* base of square of difference of A and B */
735 register HALF
*hd
, *h1
, *h2
, *h3
; /* for inner loops */
736 SIUNION sival
; /* for addition of digits */
739 * First trim the number of leading zeroes.
742 while ((*hd
== 0) && (size
> 1)) {
746 sizetotal
= size
+ size
;
749 * If the number has only a small number of digits, then use the
750 * (almost) normal multiplication method. Multiply each halfword
751 * only by those halfwards further on in the number. Missed terms
752 * will then be the same pairs of products repeated, and the squares
753 * of each halfword. The first case is handled by doubling the
754 * result. The second case is handled explicitly. The number of
755 * digits where the algorithm changes is settable from 2 to maxint.
757 if (size
< conf
->sq2
) {
765 for (len
= size
; len
--; hd
+= 2) {
766 digit
= (FULL
) *h2
++;
773 while (len1
>= 4) { /* expand the loop some */
775 sival
.ivalue
= (digit
* ((FULL
) *h3
++))
776 + ((FULL
) *h1
) + carry
;
778 sival
.ivalue
= (digit
* ((FULL
) *h3
++))
779 + ((FULL
) *h1
) + ((FULL
) sival
.sihigh
);
781 sival
.ivalue
= (digit
* ((FULL
) *h3
++))
782 + ((FULL
) *h1
) + ((FULL
) sival
.sihigh
);
784 sival
.ivalue
= (digit
* ((FULL
) *h3
++))
785 + ((FULL
) *h1
) + ((FULL
) sival
.sihigh
);
787 carry
= sival
.sihigh
;
790 sival
.ivalue
= (digit
* ((FULL
) *h3
++))
791 + ((FULL
) *h1
) + carry
;
793 carry
= sival
.sihigh
;
796 sival
.ivalue
= ((FULL
) *h1
) + carry
;
798 carry
= sival
.sihigh
;
803 * Now double the result.
804 * There is no final carry to worry about because we
805 * handle all digits of the result which must fit.
811 digit
= ((FULL
) *hd
);
812 sival
.ivalue
= digit
+ digit
+ carry
;
813 /* ignore Saber-C warning #112 - get ushort from uint */
814 /* ok to ignore on name dosquare`sival */
816 carry
= sival
.sihigh
;
820 * Now add in the squares of each halfword.
827 digit
= ((FULL
) *h3
++);
828 sival
.ivalue
= digit
* digit
+ ((FULL
) *hd
) + carry
;
830 carry
= sival
.sihigh
;
831 sival
.ivalue
= ((FULL
) *hd
) + carry
;
833 carry
= sival
.sihigh
;
836 sival
.ivalue
= ((FULL
) *hd
) + carry
;
838 carry
= sival
.sihigh
;
842 * Finally return the size of the result.
846 while ((*hd
== 0) && (len
> 1)) {
854 * The number to be squared is large.
855 * Allocate temporary space and determine the sizes and
856 * positions of the values to be calculated.
859 tempbuf
+= (3 * (size
+ 1) / 2);
862 sizeB
= size
- sizeA
;
866 baseAA
= &ans
[shift
* 2];
870 baseABAB
= &temp
[shift
];
873 * Trim the second number of leading zeroes.
875 hd
= &baseB
[sizeB
- 1];
876 while ((*hd
== 0) && (sizeB
> 1)) {
882 * Now to proceed to calculate the result using the formula.
883 * (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
884 * The steps are done in the following order:
887 * (S^2+S)*A^2 + (S+1)*B^2
889 * (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
891 * Begin by forming the squares of two the halfs concatenated
892 * together in the final result location. Make sure that the
893 * highest words of the results are zero.
895 sizeBB
= dosquare(baseB
, sizeB
, baseBB
);
896 hd
= &baseBB
[sizeBB
];
897 len
= shift
* 2 - sizeBB
;
901 sizeAA
= dosquare(baseA
, sizeA
, baseAA
);
902 hd
= &baseAA
[sizeAA
];
903 len
= sizetotal
- shift
* 2 - sizeAA
;
908 * Sum the two squares into a temporary location.
910 if (sizeAA
>= sizeBB
) {
921 copysize
= sizeAABB
- sumsize
;
926 sival
.ivalue
= ((FULL
) *h1
++) + ((FULL
) *h2
++) + carry
;
928 carry
= sival
.sihigh
;
931 sival
.ivalue
= ((FULL
) *h1
++) + carry
;
933 carry
= sival
.sihigh
;
941 * Add the sum back into the previously calculated squares
942 * shifted over to the proper location.
949 sival
.ivalue
= ((FULL
) *hd
) + ((FULL
) *h1
++) + carry
;
951 carry
= sival
.sihigh
;
954 /* uninitialized memory read (see zsquare) */
955 sival
.ivalue
= ((FULL
) *hd
) + carry
;
957 carry
= sival
.sihigh
;
961 * Calculate the absolute value of the difference of the two halfs
962 * into a temporary location.
964 if (sizeA
== sizeB
) {
966 h1
= &baseA
[len
- 1];
967 h2
= &baseB
[len
- 1];
968 while ((len
> 1) && (*h1
== *h2
)) {
974 if ((sizeA
> sizeB
) || ((sizeA
== sizeB
) && (*h1
> *h2
))) {
985 copysize
= sizeAB
- subsize
;
990 sival
.ivalue
= BASE1
- ((FULL
) *h1
++) + ((FULL
) *h2
++) + carry
;
991 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
992 carry
= sival
.sihigh
;
995 sival
.ivalue
= (BASE1
- ((FULL
) *h1
++)) + carry
;
996 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
997 carry
= sival
.sihigh
;
1000 hd
= &baseAB
[sizeAB
- 1];
1001 while ((*hd
== 0) && (sizeAB
> 1)) {
1007 * Now square the number into another temporary location,
1008 * and subtract that from the final result.
1010 sizeABAB
= dosquare(baseAB
, sizeAB
, baseABAB
);
1015 while (sizeABAB
--) {
1016 sival
.ivalue
= BASE1
- ((FULL
) *hd
) + ((FULL
) *h1
++) + carry
;
1017 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
1018 carry
= sival
.sihigh
;
1021 sival
.ivalue
= BASE1
- ((FULL
) *hd
) + carry
;
1022 *hd
++ = (HALF
)(BASE1
- sival
.silow
);
1023 carry
= sival
.sihigh
;
1027 * Return the size of the result.
1031 while ((*hd
== 0) && (len
> 1)) {
1041 * Return a pointer to a buffer to be used for holding a temporary number.
1042 * The buffer will be at least as large as the specified number of HALFs,
1043 * and remains valid until the next call to this routine. The buffer cannot
1044 * be freed by the caller. There is only one temporary buffer, and so as to
1045 * avoid possible conflicts this is only used by the lowest level routines
1046 * such as divide, multiply, and square.
1049 * len required number of HALFs in buffer
1055 STATIC LEN buflen
; /* current length of temp buffer */
1056 STATIC HALF
*bufptr
; /* pointer to current temp buffer */
1062 * We need to grow the temporary buffer.
1063 * First free any existing buffer, and then allocate the new one.
1064 * While we are at it, make the new buffer bigger than necessary
1065 * in order to reduce the number of reallocations.
1072 /* don't call alloc() because _math_abort_ may not be set right */
1073 hp
= (HALF
*) malloc((len
+1) * sizeof(HALF
));
1075 math_error("No memory for temp buffer");