drm/i915: Move load time shrinker registration later
[linux/fpc-iii.git] / arch / parisc / math-emu / fmpyfadd.c
blobb067c45c872d4fac64142b1784162ca3154bc78b
1 /*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4 * Floating-point emulation code
5 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * BEGIN_DESC
24 * File:
25 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
27 * Purpose:
28 * Double Floating-point Multiply Fused Add
29 * Double Floating-point Multiply Negate Fused Add
30 * Single Floating-point Multiply Fused Add
31 * Single Floating-point Multiply Negate Fused Add
33 * External Interfaces:
34 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
39 * Internal Interfaces:
41 * Theory:
42 * <<please update with a overview of the operation of this file>>
44 * END_DESC
48 #include "float.h"
49 #include "sgl_float.h"
50 #include "dbl_float.h"
54 * Double Floating-point Multiply Fused Add
57 int
58 dbl_fmpyfadd(
59 dbl_floating_point *src1ptr,
60 dbl_floating_point *src2ptr,
61 dbl_floating_point *src3ptr,
62 unsigned int *status,
63 dbl_floating_point *dstptr)
65 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67 unsigned int rightp1, rightp2, rightp3, rightp4;
68 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69 register int mpy_exponent, add_exponent, count;
70 boolean inexact = FALSE, is_tiny = FALSE;
72 unsigned int signlessleft1, signlessright1, save;
73 register int result_exponent, diff_exponent;
74 int sign_save, jumpsize;
76 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
80 /*
81 * set sign bit of result of multiply
83 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84 Dbl_setnegativezerop1(resultp1);
85 else Dbl_setzerop1(resultp1);
88 * Generate multiply exponent
90 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
93 * check first operand for NaN's or infinity
95 if (Dbl_isinfinity_exponent(opnd1p1)) {
96 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98 Dbl_isnotnan(opnd3p1,opnd3p2)) {
99 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
101 * invalid since operands are infinity
102 * and zero
104 if (Is_invalidtrap_enabled())
105 return(OPC_2E_INVALIDEXCEPTION);
106 Set_invalidflag();
107 Dbl_makequietnan(resultp1,resultp2);
108 Dbl_copytoptr(resultp1,resultp2,dstptr);
109 return(NOEXCEPTION);
112 * Check third operand for infinity with a
113 * sign opposite of the multiply result
115 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
118 * invalid since attempting a magnitude
119 * subtraction of infinities
121 if (Is_invalidtrap_enabled())
122 return(OPC_2E_INVALIDEXCEPTION);
123 Set_invalidflag();
124 Dbl_makequietnan(resultp1,resultp2);
125 Dbl_copytoptr(resultp1,resultp2,dstptr);
126 return(NOEXCEPTION);
130 * return infinity
132 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133 Dbl_copytoptr(resultp1,resultp2,dstptr);
134 return(NOEXCEPTION);
137 else {
139 * is NaN; signaling or quiet?
141 if (Dbl_isone_signaling(opnd1p1)) {
142 /* trap if INVALIDTRAP enabled */
143 if (Is_invalidtrap_enabled())
144 return(OPC_2E_INVALIDEXCEPTION);
145 /* make NaN quiet */
146 Set_invalidflag();
147 Dbl_set_quiet(opnd1p1);
150 * is second operand a signaling NaN?
152 else if (Dbl_is_signalingnan(opnd2p1)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION);
156 /* make NaN quiet */
157 Set_invalidflag();
158 Dbl_set_quiet(opnd2p1);
159 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160 return(NOEXCEPTION);
163 * is third operand a signaling NaN?
165 else if (Dbl_is_signalingnan(opnd3p1)) {
166 /* trap if INVALIDTRAP enabled */
167 if (Is_invalidtrap_enabled())
168 return(OPC_2E_INVALIDEXCEPTION);
169 /* make NaN quiet */
170 Set_invalidflag();
171 Dbl_set_quiet(opnd3p1);
172 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173 return(NOEXCEPTION);
176 * return quiet NaN
178 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179 return(NOEXCEPTION);
184 * check second operand for NaN's or infinity
186 if (Dbl_isinfinity_exponent(opnd2p1)) {
187 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
191 * invalid since multiply operands are
192 * zero & infinity
194 if (Is_invalidtrap_enabled())
195 return(OPC_2E_INVALIDEXCEPTION);
196 Set_invalidflag();
197 Dbl_makequietnan(opnd2p1,opnd2p2);
198 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199 return(NOEXCEPTION);
203 * Check third operand for infinity with a
204 * sign opposite of the multiply result
206 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
209 * invalid since attempting a magnitude
210 * subtraction of infinities
212 if (Is_invalidtrap_enabled())
213 return(OPC_2E_INVALIDEXCEPTION);
214 Set_invalidflag();
215 Dbl_makequietnan(resultp1,resultp2);
216 Dbl_copytoptr(resultp1,resultp2,dstptr);
217 return(NOEXCEPTION);
221 * return infinity
223 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224 Dbl_copytoptr(resultp1,resultp2,dstptr);
225 return(NOEXCEPTION);
228 else {
230 * is NaN; signaling or quiet?
232 if (Dbl_isone_signaling(opnd2p1)) {
233 /* trap if INVALIDTRAP enabled */
234 if (Is_invalidtrap_enabled())
235 return(OPC_2E_INVALIDEXCEPTION);
236 /* make NaN quiet */
237 Set_invalidflag();
238 Dbl_set_quiet(opnd2p1);
241 * is third operand a signaling NaN?
243 else if (Dbl_is_signalingnan(opnd3p1)) {
244 /* trap if INVALIDTRAP enabled */
245 if (Is_invalidtrap_enabled())
246 return(OPC_2E_INVALIDEXCEPTION);
247 /* make NaN quiet */
248 Set_invalidflag();
249 Dbl_set_quiet(opnd3p1);
250 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251 return(NOEXCEPTION);
254 * return quiet NaN
256 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257 return(NOEXCEPTION);
262 * check third operand for NaN's or infinity
264 if (Dbl_isinfinity_exponent(opnd3p1)) {
265 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266 /* return infinity */
267 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268 return(NOEXCEPTION);
269 } else {
271 * is NaN; signaling or quiet?
273 if (Dbl_isone_signaling(opnd3p1)) {
274 /* trap if INVALIDTRAP enabled */
275 if (Is_invalidtrap_enabled())
276 return(OPC_2E_INVALIDEXCEPTION);
277 /* make NaN quiet */
278 Set_invalidflag();
279 Dbl_set_quiet(opnd3p1);
282 * return quiet NaN
284 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285 return(NOEXCEPTION);
290 * Generate multiply mantissa
292 if (Dbl_isnotzero_exponent(opnd1p1)) {
293 /* set hidden bit */
294 Dbl_clear_signexponent_set_hidden(opnd1p1);
296 else {
297 /* check for zero */
298 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
300 * Perform the add opnd3 with zero here.
302 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303 if (Is_rounding_mode(ROUNDMINUS)) {
304 Dbl_or_signs(opnd3p1,resultp1);
305 } else {
306 Dbl_and_signs(opnd3p1,resultp1);
310 * Now let's check for trapped underflow case.
312 else if (Dbl_iszero_exponent(opnd3p1) &&
313 Is_underflowtrap_enabled()) {
314 /* need to normalize results mantissa */
315 sign_save = Dbl_signextendedsign(opnd3p1);
316 result_exponent = 0;
317 Dbl_leftshiftby1(opnd3p1,opnd3p2);
318 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319 Dbl_set_sign(opnd3p1,/*using*/sign_save);
320 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321 unfl);
322 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323 /* inexact = FALSE */
324 return(OPC_2E_UNDERFLOWEXCEPTION);
326 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327 return(NOEXCEPTION);
329 /* is denormalized, adjust exponent */
330 Dbl_clear_signexponent(opnd1p1);
331 Dbl_leftshiftby1(opnd1p1,opnd1p2);
332 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
334 /* opnd2 needs to have hidden bit set with msb in hidden bit */
335 if (Dbl_isnotzero_exponent(opnd2p1)) {
336 Dbl_clear_signexponent_set_hidden(opnd2p1);
338 else {
339 /* check for zero */
340 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
342 * Perform the add opnd3 with zero here.
344 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345 if (Is_rounding_mode(ROUNDMINUS)) {
346 Dbl_or_signs(opnd3p1,resultp1);
347 } else {
348 Dbl_and_signs(opnd3p1,resultp1);
352 * Now let's check for trapped underflow case.
354 else if (Dbl_iszero_exponent(opnd3p1) &&
355 Is_underflowtrap_enabled()) {
356 /* need to normalize results mantissa */
357 sign_save = Dbl_signextendedsign(opnd3p1);
358 result_exponent = 0;
359 Dbl_leftshiftby1(opnd3p1,opnd3p2);
360 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361 Dbl_set_sign(opnd3p1,/*using*/sign_save);
362 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363 unfl);
364 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365 /* inexact = FALSE */
366 return(OPC_2E_UNDERFLOWEXCEPTION);
368 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369 return(NOEXCEPTION);
371 /* is denormalized; want to normalize */
372 Dbl_clear_signexponent(opnd2p1);
373 Dbl_leftshiftby1(opnd2p1,opnd2p2);
374 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
377 /* Multiply the first two source mantissas together */
380 * The intermediate result will be kept in tmpres,
381 * which needs enough room for 106 bits of mantissa,
382 * so lets call it a Double extended.
384 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
387 * Four bits at a time are inspected in each loop, and a
388 * simple shift and add multiply algorithm is used.
390 for (count = DBL_P-1; count >= 0; count -= 4) {
391 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392 if (Dbit28p2(opnd1p2)) {
393 /* Fourword_add should be an ADD followed by 3 ADDC's */
394 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
397 if (Dbit29p2(opnd1p2)) {
398 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
401 if (Dbit30p2(opnd1p2)) {
402 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
405 if (Dbit31p2(opnd1p2)) {
406 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407 opnd2p1, opnd2p2, 0, 0);
409 Dbl_rightshiftby4(opnd1p1,opnd1p2);
411 if (Is_dexthiddenoverflow(tmpresp1)) {
412 /* result mantissa >= 2 (mantissa overflow) */
413 mpy_exponent++;
414 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
418 * Restore the sign of the mpy result which was saved in resultp1.
419 * The exponent will continue to be kept in mpy_exponent.
421 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
424 * No rounding is required, since the result of the multiply
425 * is exact in the extended format.
429 * Now we are ready to perform the add portion of the operation.
431 * The exponents need to be kept as integers for now, since the
432 * multiply result might not fit into the exponent field. We
433 * can't overflow or underflow because of this yet, since the
434 * add could bring the final result back into range.
436 add_exponent = Dbl_exponent(opnd3p1);
439 * Check for denormalized or zero add operand.
441 if (add_exponent == 0) {
442 /* check for zero */
443 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444 /* right is zero */
445 /* Left can't be zero and must be result.
447 * The final result is now in tmpres and mpy_exponent,
448 * and needs to be rounded and squeezed back into
449 * double precision format from double extended.
451 result_exponent = mpy_exponent;
452 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453 resultp1,resultp2,resultp3,resultp4);
454 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455 goto round;
459 * Neither are zeroes.
460 * Adjust exponent and normalize add operand.
462 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
463 Dbl_clear_signexponent(opnd3p1);
464 Dbl_leftshiftby1(opnd3p1,opnd3p2);
465 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
467 } else {
468 Dbl_clear_exponent_set_hidden(opnd3p1);
471 * Copy opnd3 to the double extended variable called right.
473 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
476 * A zero "save" helps discover equal operands (for later),
477 * and is used in swapping operands (if needed).
479 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
482 * Compare magnitude of operands.
484 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
489 * Set the left operand to the larger one by XOR swap.
490 * First finish the first word "save".
492 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495 rightp2,rightp3,rightp4);
496 /* also setup exponents used in rest of routine */
497 diff_exponent = add_exponent - mpy_exponent;
498 result_exponent = add_exponent;
499 } else {
500 /* also setup exponents used in rest of routine */
501 diff_exponent = mpy_exponent - add_exponent;
502 result_exponent = mpy_exponent;
504 /* Invariant: left is not smaller than right. */
507 * Special case alignment of operands that would force alignment
508 * beyond the extent of the extension. A further optimization
509 * could special case this but only reduces the path length for
510 * this infrequent case.
512 if (diff_exponent > DBLEXT_THRESHOLD) {
513 diff_exponent = DBLEXT_THRESHOLD;
516 /* Align right operand by shifting it to the right */
517 Dblext_clear_sign(rightp1);
518 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519 /*shifted by*/diff_exponent);
521 /* Treat sum and difference of the operands separately. */
522 if ((int)save < 0) {
524 * Difference of the two operands. Overflow can occur if the
525 * multiply overflowed. A borrow can occur out of the hidden
526 * bit and force a post normalization phase.
528 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529 rightp1,rightp2,rightp3,rightp4,
530 resultp1,resultp2,resultp3,resultp4);
531 sign_save = Dbl_signextendedsign(resultp1);
532 if (Dbl_iszero_hidden(resultp1)) {
533 /* Handle normalization */
534 /* A straightforward algorithm would now shift the
535 * result and extension left until the hidden bit
536 * becomes one. Not all of the extension bits need
537 * participate in the shift. Only the two most
538 * significant bits (round and guard) are needed.
539 * If only a single shift is needed then the guard
540 * bit becomes a significant low order bit and the
541 * extension must participate in the rounding.
542 * If more than a single shift is needed, then all
543 * bits to the right of the guard bit are zeros,
544 * and the guard bit may or may not be zero. */
545 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546 resultp4);
548 /* Need to check for a zero result. The sign and
549 * exponent fields have already been zeroed. The more
550 * efficient test of the full object can be used.
552 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553 /* Must have been "x-x" or "x+(-x)". */
554 if (Is_rounding_mode(ROUNDMINUS))
555 Dbl_setone_sign(resultp1);
556 Dbl_copytoptr(resultp1,resultp2,dstptr);
557 return(NOEXCEPTION);
559 result_exponent--;
561 /* Look to see if normalization is finished. */
562 if (Dbl_isone_hidden(resultp1)) {
563 /* No further normalization is needed */
564 goto round;
567 /* Discover first one bit to determine shift amount.
568 * Use a modified binary search. We have already
569 * shifted the result one position right and still
570 * not found a one so the remainder of the extension
571 * must be zero and simplifies rounding. */
572 /* Scan bytes */
573 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575 result_exponent -= 8;
577 /* Now narrow it down to the nibble */
578 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579 /* The lower nibble contains the
580 * normalizing one */
581 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582 result_exponent -= 4;
584 /* Select case where first bit is set (already
585 * normalized) otherwise select the proper shift. */
586 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587 if (jumpsize <= 7) switch(jumpsize) {
588 case 1:
589 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590 resultp4);
591 result_exponent -= 3;
592 break;
593 case 2:
594 case 3:
595 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596 resultp4);
597 result_exponent -= 2;
598 break;
599 case 4:
600 case 5:
601 case 6:
602 case 7:
603 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604 resultp4);
605 result_exponent -= 1;
606 break;
608 } /* end if (hidden...)... */
609 /* Fall through and round */
610 } /* end if (save < 0)... */
611 else {
612 /* Add magnitudes */
613 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614 rightp1,rightp2,rightp3,rightp4,
615 /*to*/resultp1,resultp2,resultp3,resultp4);
616 sign_save = Dbl_signextendedsign(resultp1);
617 if (Dbl_isone_hiddenoverflow(resultp1)) {
618 /* Prenormalization required. */
619 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620 resultp4);
621 result_exponent++;
622 } /* end if hiddenoverflow... */
623 } /* end else ...add magnitudes... */
625 /* Round the result. If the extension and lower two words are
626 * all zeros, then the result is exact. Otherwise round in the
627 * correct direction. Underflow is possible. If a postnormalization
628 * is necessary, then the mantissa is all zeros so no shift is needed.
630 round:
631 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633 result_exponent,is_tiny);
635 Dbl_set_sign(resultp1,/*using*/sign_save);
636 if (Dblext_isnotzero_mantissap3(resultp3) ||
637 Dblext_isnotzero_mantissap4(resultp4)) {
638 inexact = TRUE;
639 switch(Rounding_mode()) {
640 case ROUNDNEAREST: /* The default. */
641 if (Dblext_isone_highp3(resultp3)) {
642 /* at least 1/2 ulp */
643 if (Dblext_isnotzero_low31p3(resultp3) ||
644 Dblext_isnotzero_mantissap4(resultp4) ||
645 Dblext_isone_lowp2(resultp2)) {
646 /* either exactly half way and odd or
647 * more than 1/2ulp */
648 Dbl_increment(resultp1,resultp2);
651 break;
653 case ROUNDPLUS:
654 if (Dbl_iszero_sign(resultp1)) {
655 /* Round up positive results */
656 Dbl_increment(resultp1,resultp2);
658 break;
660 case ROUNDMINUS:
661 if (Dbl_isone_sign(resultp1)) {
662 /* Round down negative results */
663 Dbl_increment(resultp1,resultp2);
666 case ROUNDZERO:;
667 /* truncate is simple */
668 } /* end switch... */
669 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
671 if (result_exponent >= DBL_INFINITY_EXPONENT) {
672 /* trap if OVERFLOWTRAP enabled */
673 if (Is_overflowtrap_enabled()) {
675 * Adjust bias of result
677 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678 Dbl_copytoptr(resultp1,resultp2,dstptr);
679 if (inexact)
680 if (Is_inexacttrap_enabled())
681 return (OPC_2E_OVERFLOWEXCEPTION |
682 OPC_2E_INEXACTEXCEPTION);
683 else Set_inexactflag();
684 return (OPC_2E_OVERFLOWEXCEPTION);
686 inexact = TRUE;
687 Set_overflowflag();
688 /* set result to infinity or largest number */
689 Dbl_setoverflow(resultp1,resultp2);
691 } else if (result_exponent <= 0) { /* underflow case */
692 if (Is_underflowtrap_enabled()) {
694 * Adjust bias of result
696 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697 Dbl_copytoptr(resultp1,resultp2,dstptr);
698 if (inexact)
699 if (Is_inexacttrap_enabled())
700 return (OPC_2E_UNDERFLOWEXCEPTION |
701 OPC_2E_INEXACTEXCEPTION);
702 else Set_inexactflag();
703 return(OPC_2E_UNDERFLOWEXCEPTION);
705 else if (inexact && is_tiny) Set_underflowflag();
707 else Dbl_set_exponent(resultp1,result_exponent);
708 Dbl_copytoptr(resultp1,resultp2,dstptr);
709 if (inexact)
710 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711 else Set_inexactflag();
712 return(NOEXCEPTION);
716 * Double Floating-point Multiply Negate Fused Add
719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722 unsigned int *status;
724 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726 unsigned int rightp1, rightp2, rightp3, rightp4;
727 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728 register int mpy_exponent, add_exponent, count;
729 boolean inexact = FALSE, is_tiny = FALSE;
731 unsigned int signlessleft1, signlessright1, save;
732 register int result_exponent, diff_exponent;
733 int sign_save, jumpsize;
735 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
740 * set sign bit of result of multiply
742 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743 Dbl_setzerop1(resultp1);
744 else
745 Dbl_setnegativezerop1(resultp1);
748 * Generate multiply exponent
750 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
753 * check first operand for NaN's or infinity
755 if (Dbl_isinfinity_exponent(opnd1p1)) {
756 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758 Dbl_isnotnan(opnd3p1,opnd3p2)) {
759 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
761 * invalid since operands are infinity
762 * and zero
764 if (Is_invalidtrap_enabled())
765 return(OPC_2E_INVALIDEXCEPTION);
766 Set_invalidflag();
767 Dbl_makequietnan(resultp1,resultp2);
768 Dbl_copytoptr(resultp1,resultp2,dstptr);
769 return(NOEXCEPTION);
772 * Check third operand for infinity with a
773 * sign opposite of the multiply result
775 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
778 * invalid since attempting a magnitude
779 * subtraction of infinities
781 if (Is_invalidtrap_enabled())
782 return(OPC_2E_INVALIDEXCEPTION);
783 Set_invalidflag();
784 Dbl_makequietnan(resultp1,resultp2);
785 Dbl_copytoptr(resultp1,resultp2,dstptr);
786 return(NOEXCEPTION);
790 * return infinity
792 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793 Dbl_copytoptr(resultp1,resultp2,dstptr);
794 return(NOEXCEPTION);
797 else {
799 * is NaN; signaling or quiet?
801 if (Dbl_isone_signaling(opnd1p1)) {
802 /* trap if INVALIDTRAP enabled */
803 if (Is_invalidtrap_enabled())
804 return(OPC_2E_INVALIDEXCEPTION);
805 /* make NaN quiet */
806 Set_invalidflag();
807 Dbl_set_quiet(opnd1p1);
810 * is second operand a signaling NaN?
812 else if (Dbl_is_signalingnan(opnd2p1)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION);
816 /* make NaN quiet */
817 Set_invalidflag();
818 Dbl_set_quiet(opnd2p1);
819 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820 return(NOEXCEPTION);
823 * is third operand a signaling NaN?
825 else if (Dbl_is_signalingnan(opnd3p1)) {
826 /* trap if INVALIDTRAP enabled */
827 if (Is_invalidtrap_enabled())
828 return(OPC_2E_INVALIDEXCEPTION);
829 /* make NaN quiet */
830 Set_invalidflag();
831 Dbl_set_quiet(opnd3p1);
832 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833 return(NOEXCEPTION);
836 * return quiet NaN
838 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839 return(NOEXCEPTION);
844 * check second operand for NaN's or infinity
846 if (Dbl_isinfinity_exponent(opnd2p1)) {
847 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
851 * invalid since multiply operands are
852 * zero & infinity
854 if (Is_invalidtrap_enabled())
855 return(OPC_2E_INVALIDEXCEPTION);
856 Set_invalidflag();
857 Dbl_makequietnan(opnd2p1,opnd2p2);
858 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859 return(NOEXCEPTION);
863 * Check third operand for infinity with a
864 * sign opposite of the multiply result
866 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
869 * invalid since attempting a magnitude
870 * subtraction of infinities
872 if (Is_invalidtrap_enabled())
873 return(OPC_2E_INVALIDEXCEPTION);
874 Set_invalidflag();
875 Dbl_makequietnan(resultp1,resultp2);
876 Dbl_copytoptr(resultp1,resultp2,dstptr);
877 return(NOEXCEPTION);
881 * return infinity
883 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884 Dbl_copytoptr(resultp1,resultp2,dstptr);
885 return(NOEXCEPTION);
888 else {
890 * is NaN; signaling or quiet?
892 if (Dbl_isone_signaling(opnd2p1)) {
893 /* trap if INVALIDTRAP enabled */
894 if (Is_invalidtrap_enabled())
895 return(OPC_2E_INVALIDEXCEPTION);
896 /* make NaN quiet */
897 Set_invalidflag();
898 Dbl_set_quiet(opnd2p1);
901 * is third operand a signaling NaN?
903 else if (Dbl_is_signalingnan(opnd3p1)) {
904 /* trap if INVALIDTRAP enabled */
905 if (Is_invalidtrap_enabled())
906 return(OPC_2E_INVALIDEXCEPTION);
907 /* make NaN quiet */
908 Set_invalidflag();
909 Dbl_set_quiet(opnd3p1);
910 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911 return(NOEXCEPTION);
914 * return quiet NaN
916 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917 return(NOEXCEPTION);
922 * check third operand for NaN's or infinity
924 if (Dbl_isinfinity_exponent(opnd3p1)) {
925 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926 /* return infinity */
927 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928 return(NOEXCEPTION);
929 } else {
931 * is NaN; signaling or quiet?
933 if (Dbl_isone_signaling(opnd3p1)) {
934 /* trap if INVALIDTRAP enabled */
935 if (Is_invalidtrap_enabled())
936 return(OPC_2E_INVALIDEXCEPTION);
937 /* make NaN quiet */
938 Set_invalidflag();
939 Dbl_set_quiet(opnd3p1);
942 * return quiet NaN
944 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945 return(NOEXCEPTION);
950 * Generate multiply mantissa
952 if (Dbl_isnotzero_exponent(opnd1p1)) {
953 /* set hidden bit */
954 Dbl_clear_signexponent_set_hidden(opnd1p1);
956 else {
957 /* check for zero */
958 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
960 * Perform the add opnd3 with zero here.
962 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963 if (Is_rounding_mode(ROUNDMINUS)) {
964 Dbl_or_signs(opnd3p1,resultp1);
965 } else {
966 Dbl_and_signs(opnd3p1,resultp1);
970 * Now let's check for trapped underflow case.
972 else if (Dbl_iszero_exponent(opnd3p1) &&
973 Is_underflowtrap_enabled()) {
974 /* need to normalize results mantissa */
975 sign_save = Dbl_signextendedsign(opnd3p1);
976 result_exponent = 0;
977 Dbl_leftshiftby1(opnd3p1,opnd3p2);
978 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979 Dbl_set_sign(opnd3p1,/*using*/sign_save);
980 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981 unfl);
982 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983 /* inexact = FALSE */
984 return(OPC_2E_UNDERFLOWEXCEPTION);
986 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987 return(NOEXCEPTION);
989 /* is denormalized, adjust exponent */
990 Dbl_clear_signexponent(opnd1p1);
991 Dbl_leftshiftby1(opnd1p1,opnd1p2);
992 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
994 /* opnd2 needs to have hidden bit set with msb in hidden bit */
995 if (Dbl_isnotzero_exponent(opnd2p1)) {
996 Dbl_clear_signexponent_set_hidden(opnd2p1);
998 else {
999 /* check for zero */
1000 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1002 * Perform the add opnd3 with zero here.
1004 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005 if (Is_rounding_mode(ROUNDMINUS)) {
1006 Dbl_or_signs(opnd3p1,resultp1);
1007 } else {
1008 Dbl_and_signs(opnd3p1,resultp1);
1012 * Now let's check for trapped underflow case.
1014 else if (Dbl_iszero_exponent(opnd3p1) &&
1015 Is_underflowtrap_enabled()) {
1016 /* need to normalize results mantissa */
1017 sign_save = Dbl_signextendedsign(opnd3p1);
1018 result_exponent = 0;
1019 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023 unfl);
1024 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025 /* inexact = FALSE */
1026 return(OPC_2E_UNDERFLOWEXCEPTION);
1028 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029 return(NOEXCEPTION);
1031 /* is denormalized; want to normalize */
1032 Dbl_clear_signexponent(opnd2p1);
1033 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1037 /* Multiply the first two source mantissas together */
1040 * The intermediate result will be kept in tmpres,
1041 * which needs enough room for 106 bits of mantissa,
1042 * so lets call it a Double extended.
1044 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1047 * Four bits at a time are inspected in each loop, and a
1048 * simple shift and add multiply algorithm is used.
1050 for (count = DBL_P-1; count >= 0; count -= 4) {
1051 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052 if (Dbit28p2(opnd1p2)) {
1053 /* Fourword_add should be an ADD followed by 3 ADDC's */
1054 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1057 if (Dbit29p2(opnd1p2)) {
1058 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1061 if (Dbit30p2(opnd1p2)) {
1062 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1065 if (Dbit31p2(opnd1p2)) {
1066 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067 opnd2p1, opnd2p2, 0, 0);
1069 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1071 if (Is_dexthiddenoverflow(tmpresp1)) {
1072 /* result mantissa >= 2 (mantissa overflow) */
1073 mpy_exponent++;
1074 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1078 * Restore the sign of the mpy result which was saved in resultp1.
1079 * The exponent will continue to be kept in mpy_exponent.
1081 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1084 * No rounding is required, since the result of the multiply
1085 * is exact in the extended format.
1089 * Now we are ready to perform the add portion of the operation.
1091 * The exponents need to be kept as integers for now, since the
1092 * multiply result might not fit into the exponent field. We
1093 * can't overflow or underflow because of this yet, since the
1094 * add could bring the final result back into range.
1096 add_exponent = Dbl_exponent(opnd3p1);
1099 * Check for denormalized or zero add operand.
1101 if (add_exponent == 0) {
1102 /* check for zero */
1103 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104 /* right is zero */
1105 /* Left can't be zero and must be result.
1107 * The final result is now in tmpres and mpy_exponent,
1108 * and needs to be rounded and squeezed back into
1109 * double precision format from double extended.
1111 result_exponent = mpy_exponent;
1112 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113 resultp1,resultp2,resultp3,resultp4);
1114 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115 goto round;
1119 * Neither are zeroes.
1120 * Adjust exponent and normalize add operand.
1122 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1123 Dbl_clear_signexponent(opnd3p1);
1124 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1127 } else {
1128 Dbl_clear_exponent_set_hidden(opnd3p1);
1131 * Copy opnd3 to the double extended variable called right.
1133 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1136 * A zero "save" helps discover equal operands (for later),
1137 * and is used in swapping operands (if needed).
1139 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1142 * Compare magnitude of operands.
1144 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1149 * Set the left operand to the larger one by XOR swap.
1150 * First finish the first word "save".
1152 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155 rightp2,rightp3,rightp4);
1156 /* also setup exponents used in rest of routine */
1157 diff_exponent = add_exponent - mpy_exponent;
1158 result_exponent = add_exponent;
1159 } else {
1160 /* also setup exponents used in rest of routine */
1161 diff_exponent = mpy_exponent - add_exponent;
1162 result_exponent = mpy_exponent;
1164 /* Invariant: left is not smaller than right. */
1167 * Special case alignment of operands that would force alignment
1168 * beyond the extent of the extension. A further optimization
1169 * could special case this but only reduces the path length for
1170 * this infrequent case.
1172 if (diff_exponent > DBLEXT_THRESHOLD) {
1173 diff_exponent = DBLEXT_THRESHOLD;
1176 /* Align right operand by shifting it to the right */
1177 Dblext_clear_sign(rightp1);
1178 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179 /*shifted by*/diff_exponent);
1181 /* Treat sum and difference of the operands separately. */
1182 if ((int)save < 0) {
1184 * Difference of the two operands. Overflow can occur if the
1185 * multiply overflowed. A borrow can occur out of the hidden
1186 * bit and force a post normalization phase.
1188 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189 rightp1,rightp2,rightp3,rightp4,
1190 resultp1,resultp2,resultp3,resultp4);
1191 sign_save = Dbl_signextendedsign(resultp1);
1192 if (Dbl_iszero_hidden(resultp1)) {
1193 /* Handle normalization */
1194 /* A straightforward algorithm would now shift the
1195 * result and extension left until the hidden bit
1196 * becomes one. Not all of the extension bits need
1197 * participate in the shift. Only the two most
1198 * significant bits (round and guard) are needed.
1199 * If only a single shift is needed then the guard
1200 * bit becomes a significant low order bit and the
1201 * extension must participate in the rounding.
1202 * If more than a single shift is needed, then all
1203 * bits to the right of the guard bit are zeros,
1204 * and the guard bit may or may not be zero. */
1205 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206 resultp4);
1208 /* Need to check for a zero result. The sign and
1209 * exponent fields have already been zeroed. The more
1210 * efficient test of the full object can be used.
1212 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213 /* Must have been "x-x" or "x+(-x)". */
1214 if (Is_rounding_mode(ROUNDMINUS))
1215 Dbl_setone_sign(resultp1);
1216 Dbl_copytoptr(resultp1,resultp2,dstptr);
1217 return(NOEXCEPTION);
1219 result_exponent--;
1221 /* Look to see if normalization is finished. */
1222 if (Dbl_isone_hidden(resultp1)) {
1223 /* No further normalization is needed */
1224 goto round;
1227 /* Discover first one bit to determine shift amount.
1228 * Use a modified binary search. We have already
1229 * shifted the result one position right and still
1230 * not found a one so the remainder of the extension
1231 * must be zero and simplifies rounding. */
1232 /* Scan bytes */
1233 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235 result_exponent -= 8;
1237 /* Now narrow it down to the nibble */
1238 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239 /* The lower nibble contains the
1240 * normalizing one */
1241 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242 result_exponent -= 4;
1244 /* Select case where first bit is set (already
1245 * normalized) otherwise select the proper shift. */
1246 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247 if (jumpsize <= 7) switch(jumpsize) {
1248 case 1:
1249 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250 resultp4);
1251 result_exponent -= 3;
1252 break;
1253 case 2:
1254 case 3:
1255 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256 resultp4);
1257 result_exponent -= 2;
1258 break;
1259 case 4:
1260 case 5:
1261 case 6:
1262 case 7:
1263 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264 resultp4);
1265 result_exponent -= 1;
1266 break;
1268 } /* end if (hidden...)... */
1269 /* Fall through and round */
1270 } /* end if (save < 0)... */
1271 else {
1272 /* Add magnitudes */
1273 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274 rightp1,rightp2,rightp3,rightp4,
1275 /*to*/resultp1,resultp2,resultp3,resultp4);
1276 sign_save = Dbl_signextendedsign(resultp1);
1277 if (Dbl_isone_hiddenoverflow(resultp1)) {
1278 /* Prenormalization required. */
1279 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280 resultp4);
1281 result_exponent++;
1282 } /* end if hiddenoverflow... */
1283 } /* end else ...add magnitudes... */
1285 /* Round the result. If the extension and lower two words are
1286 * all zeros, then the result is exact. Otherwise round in the
1287 * correct direction. Underflow is possible. If a postnormalization
1288 * is necessary, then the mantissa is all zeros so no shift is needed.
1290 round:
1291 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293 result_exponent,is_tiny);
1295 Dbl_set_sign(resultp1,/*using*/sign_save);
1296 if (Dblext_isnotzero_mantissap3(resultp3) ||
1297 Dblext_isnotzero_mantissap4(resultp4)) {
1298 inexact = TRUE;
1299 switch(Rounding_mode()) {
1300 case ROUNDNEAREST: /* The default. */
1301 if (Dblext_isone_highp3(resultp3)) {
1302 /* at least 1/2 ulp */
1303 if (Dblext_isnotzero_low31p3(resultp3) ||
1304 Dblext_isnotzero_mantissap4(resultp4) ||
1305 Dblext_isone_lowp2(resultp2)) {
1306 /* either exactly half way and odd or
1307 * more than 1/2ulp */
1308 Dbl_increment(resultp1,resultp2);
1311 break;
1313 case ROUNDPLUS:
1314 if (Dbl_iszero_sign(resultp1)) {
1315 /* Round up positive results */
1316 Dbl_increment(resultp1,resultp2);
1318 break;
1320 case ROUNDMINUS:
1321 if (Dbl_isone_sign(resultp1)) {
1322 /* Round down negative results */
1323 Dbl_increment(resultp1,resultp2);
1326 case ROUNDZERO:;
1327 /* truncate is simple */
1328 } /* end switch... */
1329 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1331 if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332 /* Overflow */
1333 if (Is_overflowtrap_enabled()) {
1335 * Adjust bias of result
1337 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338 Dbl_copytoptr(resultp1,resultp2,dstptr);
1339 if (inexact)
1340 if (Is_inexacttrap_enabled())
1341 return (OPC_2E_OVERFLOWEXCEPTION |
1342 OPC_2E_INEXACTEXCEPTION);
1343 else Set_inexactflag();
1344 return (OPC_2E_OVERFLOWEXCEPTION);
1346 inexact = TRUE;
1347 Set_overflowflag();
1348 Dbl_setoverflow(resultp1,resultp2);
1349 } else if (result_exponent <= 0) { /* underflow case */
1350 if (Is_underflowtrap_enabled()) {
1352 * Adjust bias of result
1354 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355 Dbl_copytoptr(resultp1,resultp2,dstptr);
1356 if (inexact)
1357 if (Is_inexacttrap_enabled())
1358 return (OPC_2E_UNDERFLOWEXCEPTION |
1359 OPC_2E_INEXACTEXCEPTION);
1360 else Set_inexactflag();
1361 return(OPC_2E_UNDERFLOWEXCEPTION);
1363 else if (inexact && is_tiny) Set_underflowflag();
1365 else Dbl_set_exponent(resultp1,result_exponent);
1366 Dbl_copytoptr(resultp1,resultp2,dstptr);
1367 if (inexact)
1368 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369 else Set_inexactflag();
1370 return(NOEXCEPTION);
1374 * Single Floating-point Multiply Fused Add
1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380 unsigned int *status;
1382 unsigned int opnd1, opnd2, opnd3;
1383 register unsigned int tmpresp1, tmpresp2;
1384 unsigned int rightp1, rightp2;
1385 unsigned int resultp1, resultp2 = 0;
1386 register int mpy_exponent, add_exponent, count;
1387 boolean inexact = FALSE, is_tiny = FALSE;
1389 unsigned int signlessleft1, signlessright1, save;
1390 register int result_exponent, diff_exponent;
1391 int sign_save, jumpsize;
1393 Sgl_copyfromptr(src1ptr,opnd1);
1394 Sgl_copyfromptr(src2ptr,opnd2);
1395 Sgl_copyfromptr(src3ptr,opnd3);
1398 * set sign bit of result of multiply
1400 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401 Sgl_setnegativezero(resultp1);
1402 else Sgl_setzero(resultp1);
1405 * Generate multiply exponent
1407 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1410 * check first operand for NaN's or infinity
1412 if (Sgl_isinfinity_exponent(opnd1)) {
1413 if (Sgl_iszero_mantissa(opnd1)) {
1414 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415 if (Sgl_iszero_exponentmantissa(opnd2)) {
1417 * invalid since operands are infinity
1418 * and zero
1420 if (Is_invalidtrap_enabled())
1421 return(OPC_2E_INVALIDEXCEPTION);
1422 Set_invalidflag();
1423 Sgl_makequietnan(resultp1);
1424 Sgl_copytoptr(resultp1,dstptr);
1425 return(NOEXCEPTION);
1428 * Check third operand for infinity with a
1429 * sign opposite of the multiply result
1431 if (Sgl_isinfinity(opnd3) &&
1432 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1434 * invalid since attempting a magnitude
1435 * subtraction of infinities
1437 if (Is_invalidtrap_enabled())
1438 return(OPC_2E_INVALIDEXCEPTION);
1439 Set_invalidflag();
1440 Sgl_makequietnan(resultp1);
1441 Sgl_copytoptr(resultp1,dstptr);
1442 return(NOEXCEPTION);
1446 * return infinity
1448 Sgl_setinfinity_exponentmantissa(resultp1);
1449 Sgl_copytoptr(resultp1,dstptr);
1450 return(NOEXCEPTION);
1453 else {
1455 * is NaN; signaling or quiet?
1457 if (Sgl_isone_signaling(opnd1)) {
1458 /* trap if INVALIDTRAP enabled */
1459 if (Is_invalidtrap_enabled())
1460 return(OPC_2E_INVALIDEXCEPTION);
1461 /* make NaN quiet */
1462 Set_invalidflag();
1463 Sgl_set_quiet(opnd1);
1466 * is second operand a signaling NaN?
1468 else if (Sgl_is_signalingnan(opnd2)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION);
1472 /* make NaN quiet */
1473 Set_invalidflag();
1474 Sgl_set_quiet(opnd2);
1475 Sgl_copytoptr(opnd2,dstptr);
1476 return(NOEXCEPTION);
1479 * is third operand a signaling NaN?
1481 else if (Sgl_is_signalingnan(opnd3)) {
1482 /* trap if INVALIDTRAP enabled */
1483 if (Is_invalidtrap_enabled())
1484 return(OPC_2E_INVALIDEXCEPTION);
1485 /* make NaN quiet */
1486 Set_invalidflag();
1487 Sgl_set_quiet(opnd3);
1488 Sgl_copytoptr(opnd3,dstptr);
1489 return(NOEXCEPTION);
1492 * return quiet NaN
1494 Sgl_copytoptr(opnd1,dstptr);
1495 return(NOEXCEPTION);
1500 * check second operand for NaN's or infinity
1502 if (Sgl_isinfinity_exponent(opnd2)) {
1503 if (Sgl_iszero_mantissa(opnd2)) {
1504 if (Sgl_isnotnan(opnd3)) {
1505 if (Sgl_iszero_exponentmantissa(opnd1)) {
1507 * invalid since multiply operands are
1508 * zero & infinity
1510 if (Is_invalidtrap_enabled())
1511 return(OPC_2E_INVALIDEXCEPTION);
1512 Set_invalidflag();
1513 Sgl_makequietnan(opnd2);
1514 Sgl_copytoptr(opnd2,dstptr);
1515 return(NOEXCEPTION);
1519 * Check third operand for infinity with a
1520 * sign opposite of the multiply result
1522 if (Sgl_isinfinity(opnd3) &&
1523 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1525 * invalid since attempting a magnitude
1526 * subtraction of infinities
1528 if (Is_invalidtrap_enabled())
1529 return(OPC_2E_INVALIDEXCEPTION);
1530 Set_invalidflag();
1531 Sgl_makequietnan(resultp1);
1532 Sgl_copytoptr(resultp1,dstptr);
1533 return(NOEXCEPTION);
1537 * return infinity
1539 Sgl_setinfinity_exponentmantissa(resultp1);
1540 Sgl_copytoptr(resultp1,dstptr);
1541 return(NOEXCEPTION);
1544 else {
1546 * is NaN; signaling or quiet?
1548 if (Sgl_isone_signaling(opnd2)) {
1549 /* trap if INVALIDTRAP enabled */
1550 if (Is_invalidtrap_enabled())
1551 return(OPC_2E_INVALIDEXCEPTION);
1552 /* make NaN quiet */
1553 Set_invalidflag();
1554 Sgl_set_quiet(opnd2);
1557 * is third operand a signaling NaN?
1559 else if (Sgl_is_signalingnan(opnd3)) {
1560 /* trap if INVALIDTRAP enabled */
1561 if (Is_invalidtrap_enabled())
1562 return(OPC_2E_INVALIDEXCEPTION);
1563 /* make NaN quiet */
1564 Set_invalidflag();
1565 Sgl_set_quiet(opnd3);
1566 Sgl_copytoptr(opnd3,dstptr);
1567 return(NOEXCEPTION);
1570 * return quiet NaN
1572 Sgl_copytoptr(opnd2,dstptr);
1573 return(NOEXCEPTION);
1578 * check third operand for NaN's or infinity
1580 if (Sgl_isinfinity_exponent(opnd3)) {
1581 if (Sgl_iszero_mantissa(opnd3)) {
1582 /* return infinity */
1583 Sgl_copytoptr(opnd3,dstptr);
1584 return(NOEXCEPTION);
1585 } else {
1587 * is NaN; signaling or quiet?
1589 if (Sgl_isone_signaling(opnd3)) {
1590 /* trap if INVALIDTRAP enabled */
1591 if (Is_invalidtrap_enabled())
1592 return(OPC_2E_INVALIDEXCEPTION);
1593 /* make NaN quiet */
1594 Set_invalidflag();
1595 Sgl_set_quiet(opnd3);
1598 * return quiet NaN
1600 Sgl_copytoptr(opnd3,dstptr);
1601 return(NOEXCEPTION);
1606 * Generate multiply mantissa
1608 if (Sgl_isnotzero_exponent(opnd1)) {
1609 /* set hidden bit */
1610 Sgl_clear_signexponent_set_hidden(opnd1);
1612 else {
1613 /* check for zero */
1614 if (Sgl_iszero_mantissa(opnd1)) {
1616 * Perform the add opnd3 with zero here.
1618 if (Sgl_iszero_exponentmantissa(opnd3)) {
1619 if (Is_rounding_mode(ROUNDMINUS)) {
1620 Sgl_or_signs(opnd3,resultp1);
1621 } else {
1622 Sgl_and_signs(opnd3,resultp1);
1626 * Now let's check for trapped underflow case.
1628 else if (Sgl_iszero_exponent(opnd3) &&
1629 Is_underflowtrap_enabled()) {
1630 /* need to normalize results mantissa */
1631 sign_save = Sgl_signextendedsign(opnd3);
1632 result_exponent = 0;
1633 Sgl_leftshiftby1(opnd3);
1634 Sgl_normalize(opnd3,result_exponent);
1635 Sgl_set_sign(opnd3,/*using*/sign_save);
1636 Sgl_setwrapped_exponent(opnd3,result_exponent,
1637 unfl);
1638 Sgl_copytoptr(opnd3,dstptr);
1639 /* inexact = FALSE */
1640 return(OPC_2E_UNDERFLOWEXCEPTION);
1642 Sgl_copytoptr(opnd3,dstptr);
1643 return(NOEXCEPTION);
1645 /* is denormalized, adjust exponent */
1646 Sgl_clear_signexponent(opnd1);
1647 Sgl_leftshiftby1(opnd1);
1648 Sgl_normalize(opnd1,mpy_exponent);
1650 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651 if (Sgl_isnotzero_exponent(opnd2)) {
1652 Sgl_clear_signexponent_set_hidden(opnd2);
1654 else {
1655 /* check for zero */
1656 if (Sgl_iszero_mantissa(opnd2)) {
1658 * Perform the add opnd3 with zero here.
1660 if (Sgl_iszero_exponentmantissa(opnd3)) {
1661 if (Is_rounding_mode(ROUNDMINUS)) {
1662 Sgl_or_signs(opnd3,resultp1);
1663 } else {
1664 Sgl_and_signs(opnd3,resultp1);
1668 * Now let's check for trapped underflow case.
1670 else if (Sgl_iszero_exponent(opnd3) &&
1671 Is_underflowtrap_enabled()) {
1672 /* need to normalize results mantissa */
1673 sign_save = Sgl_signextendedsign(opnd3);
1674 result_exponent = 0;
1675 Sgl_leftshiftby1(opnd3);
1676 Sgl_normalize(opnd3,result_exponent);
1677 Sgl_set_sign(opnd3,/*using*/sign_save);
1678 Sgl_setwrapped_exponent(opnd3,result_exponent,
1679 unfl);
1680 Sgl_copytoptr(opnd3,dstptr);
1681 /* inexact = FALSE */
1682 return(OPC_2E_UNDERFLOWEXCEPTION);
1684 Sgl_copytoptr(opnd3,dstptr);
1685 return(NOEXCEPTION);
1687 /* is denormalized; want to normalize */
1688 Sgl_clear_signexponent(opnd2);
1689 Sgl_leftshiftby1(opnd2);
1690 Sgl_normalize(opnd2,mpy_exponent);
1693 /* Multiply the first two source mantissas together */
1696 * The intermediate result will be kept in tmpres,
1697 * which needs enough room for 106 bits of mantissa,
1698 * so lets call it a Double extended.
1700 Sglext_setzero(tmpresp1,tmpresp2);
1703 * Four bits at a time are inspected in each loop, and a
1704 * simple shift and add multiply algorithm is used.
1706 for (count = SGL_P-1; count >= 0; count -= 4) {
1707 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708 if (Sbit28(opnd1)) {
1709 /* Twoword_add should be an ADD followed by 2 ADDC's */
1710 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1712 if (Sbit29(opnd1)) {
1713 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1715 if (Sbit30(opnd1)) {
1716 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1718 if (Sbit31(opnd1)) {
1719 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1721 Sgl_rightshiftby4(opnd1);
1723 if (Is_sexthiddenoverflow(tmpresp1)) {
1724 /* result mantissa >= 2 (mantissa overflow) */
1725 mpy_exponent++;
1726 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727 } else {
1728 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1732 * Restore the sign of the mpy result which was saved in resultp1.
1733 * The exponent will continue to be kept in mpy_exponent.
1735 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1738 * No rounding is required, since the result of the multiply
1739 * is exact in the extended format.
1743 * Now we are ready to perform the add portion of the operation.
1745 * The exponents need to be kept as integers for now, since the
1746 * multiply result might not fit into the exponent field. We
1747 * can't overflow or underflow because of this yet, since the
1748 * add could bring the final result back into range.
1750 add_exponent = Sgl_exponent(opnd3);
1753 * Check for denormalized or zero add operand.
1755 if (add_exponent == 0) {
1756 /* check for zero */
1757 if (Sgl_iszero_mantissa(opnd3)) {
1758 /* right is zero */
1759 /* Left can't be zero and must be result.
1761 * The final result is now in tmpres and mpy_exponent,
1762 * and needs to be rounded and squeezed back into
1763 * double precision format from double extended.
1765 result_exponent = mpy_exponent;
1766 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768 goto round;
1772 * Neither are zeroes.
1773 * Adjust exponent and normalize add operand.
1775 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1776 Sgl_clear_signexponent(opnd3);
1777 Sgl_leftshiftby1(opnd3);
1778 Sgl_normalize(opnd3,add_exponent);
1779 Sgl_set_sign(opnd3,sign_save); /* restore sign */
1780 } else {
1781 Sgl_clear_exponent_set_hidden(opnd3);
1784 * Copy opnd3 to the double extended variable called right.
1786 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1789 * A zero "save" helps discover equal operands (for later),
1790 * and is used in swapping operands (if needed).
1792 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1795 * Compare magnitude of operands.
1797 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1802 * Set the left operand to the larger one by XOR swap.
1803 * First finish the first word "save".
1805 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807 Sglext_swap_lower(tmpresp2,rightp2);
1808 /* also setup exponents used in rest of routine */
1809 diff_exponent = add_exponent - mpy_exponent;
1810 result_exponent = add_exponent;
1811 } else {
1812 /* also setup exponents used in rest of routine */
1813 diff_exponent = mpy_exponent - add_exponent;
1814 result_exponent = mpy_exponent;
1816 /* Invariant: left is not smaller than right. */
1819 * Special case alignment of operands that would force alignment
1820 * beyond the extent of the extension. A further optimization
1821 * could special case this but only reduces the path length for
1822 * this infrequent case.
1824 if (diff_exponent > SGLEXT_THRESHOLD) {
1825 diff_exponent = SGLEXT_THRESHOLD;
1828 /* Align right operand by shifting it to the right */
1829 Sglext_clear_sign(rightp1);
1830 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1832 /* Treat sum and difference of the operands separately. */
1833 if ((int)save < 0) {
1835 * Difference of the two operands. Overflow can occur if the
1836 * multiply overflowed. A borrow can occur out of the hidden
1837 * bit and force a post normalization phase.
1839 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840 resultp1,resultp2);
1841 sign_save = Sgl_signextendedsign(resultp1);
1842 if (Sgl_iszero_hidden(resultp1)) {
1843 /* Handle normalization */
1844 /* A straightforward algorithm would now shift the
1845 * result and extension left until the hidden bit
1846 * becomes one. Not all of the extension bits need
1847 * participate in the shift. Only the two most
1848 * significant bits (round and guard) are needed.
1849 * If only a single shift is needed then the guard
1850 * bit becomes a significant low order bit and the
1851 * extension must participate in the rounding.
1852 * If more than a single shift is needed, then all
1853 * bits to the right of the guard bit are zeros,
1854 * and the guard bit may or may not be zero. */
1855 Sglext_leftshiftby1(resultp1,resultp2);
1857 /* Need to check for a zero result. The sign and
1858 * exponent fields have already been zeroed. The more
1859 * efficient test of the full object can be used.
1861 if (Sglext_iszero(resultp1,resultp2)) {
1862 /* Must have been "x-x" or "x+(-x)". */
1863 if (Is_rounding_mode(ROUNDMINUS))
1864 Sgl_setone_sign(resultp1);
1865 Sgl_copytoptr(resultp1,dstptr);
1866 return(NOEXCEPTION);
1868 result_exponent--;
1870 /* Look to see if normalization is finished. */
1871 if (Sgl_isone_hidden(resultp1)) {
1872 /* No further normalization is needed */
1873 goto round;
1876 /* Discover first one bit to determine shift amount.
1877 * Use a modified binary search. We have already
1878 * shifted the result one position right and still
1879 * not found a one so the remainder of the extension
1880 * must be zero and simplifies rounding. */
1881 /* Scan bytes */
1882 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883 Sglext_leftshiftby8(resultp1,resultp2);
1884 result_exponent -= 8;
1886 /* Now narrow it down to the nibble */
1887 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888 /* The lower nibble contains the
1889 * normalizing one */
1890 Sglext_leftshiftby4(resultp1,resultp2);
1891 result_exponent -= 4;
1893 /* Select case where first bit is set (already
1894 * normalized) otherwise select the proper shift. */
1895 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896 if (jumpsize <= 7) switch(jumpsize) {
1897 case 1:
1898 Sglext_leftshiftby3(resultp1,resultp2);
1899 result_exponent -= 3;
1900 break;
1901 case 2:
1902 case 3:
1903 Sglext_leftshiftby2(resultp1,resultp2);
1904 result_exponent -= 2;
1905 break;
1906 case 4:
1907 case 5:
1908 case 6:
1909 case 7:
1910 Sglext_leftshiftby1(resultp1,resultp2);
1911 result_exponent -= 1;
1912 break;
1914 } /* end if (hidden...)... */
1915 /* Fall through and round */
1916 } /* end if (save < 0)... */
1917 else {
1918 /* Add magnitudes */
1919 Sglext_addition(tmpresp1,tmpresp2,
1920 rightp1,rightp2, /*to*/resultp1,resultp2);
1921 sign_save = Sgl_signextendedsign(resultp1);
1922 if (Sgl_isone_hiddenoverflow(resultp1)) {
1923 /* Prenormalization required. */
1924 Sglext_arithrightshiftby1(resultp1,resultp2);
1925 result_exponent++;
1926 } /* end if hiddenoverflow... */
1927 } /* end else ...add magnitudes... */
1929 /* Round the result. If the extension and lower two words are
1930 * all zeros, then the result is exact. Otherwise round in the
1931 * correct direction. Underflow is possible. If a postnormalization
1932 * is necessary, then the mantissa is all zeros so no shift is needed.
1934 round:
1935 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1938 Sgl_set_sign(resultp1,/*using*/sign_save);
1939 if (Sglext_isnotzero_mantissap2(resultp2)) {
1940 inexact = TRUE;
1941 switch(Rounding_mode()) {
1942 case ROUNDNEAREST: /* The default. */
1943 if (Sglext_isone_highp2(resultp2)) {
1944 /* at least 1/2 ulp */
1945 if (Sglext_isnotzero_low31p2(resultp2) ||
1946 Sglext_isone_lowp1(resultp1)) {
1947 /* either exactly half way and odd or
1948 * more than 1/2ulp */
1949 Sgl_increment(resultp1);
1952 break;
1954 case ROUNDPLUS:
1955 if (Sgl_iszero_sign(resultp1)) {
1956 /* Round up positive results */
1957 Sgl_increment(resultp1);
1959 break;
1961 case ROUNDMINUS:
1962 if (Sgl_isone_sign(resultp1)) {
1963 /* Round down negative results */
1964 Sgl_increment(resultp1);
1967 case ROUNDZERO:;
1968 /* truncate is simple */
1969 } /* end switch... */
1970 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1972 if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973 /* Overflow */
1974 if (Is_overflowtrap_enabled()) {
1976 * Adjust bias of result
1978 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979 Sgl_copytoptr(resultp1,dstptr);
1980 if (inexact)
1981 if (Is_inexacttrap_enabled())
1982 return (OPC_2E_OVERFLOWEXCEPTION |
1983 OPC_2E_INEXACTEXCEPTION);
1984 else Set_inexactflag();
1985 return (OPC_2E_OVERFLOWEXCEPTION);
1987 inexact = TRUE;
1988 Set_overflowflag();
1989 Sgl_setoverflow(resultp1);
1990 } else if (result_exponent <= 0) { /* underflow case */
1991 if (Is_underflowtrap_enabled()) {
1993 * Adjust bias of result
1995 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996 Sgl_copytoptr(resultp1,dstptr);
1997 if (inexact)
1998 if (Is_inexacttrap_enabled())
1999 return (OPC_2E_UNDERFLOWEXCEPTION |
2000 OPC_2E_INEXACTEXCEPTION);
2001 else Set_inexactflag();
2002 return(OPC_2E_UNDERFLOWEXCEPTION);
2004 else if (inexact && is_tiny) Set_underflowflag();
2006 else Sgl_set_exponent(resultp1,result_exponent);
2007 Sgl_copytoptr(resultp1,dstptr);
2008 if (inexact)
2009 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010 else Set_inexactflag();
2011 return(NOEXCEPTION);
2015 * Single Floating-point Multiply Negate Fused Add
2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021 unsigned int *status;
2023 unsigned int opnd1, opnd2, opnd3;
2024 register unsigned int tmpresp1, tmpresp2;
2025 unsigned int rightp1, rightp2;
2026 unsigned int resultp1, resultp2 = 0;
2027 register int mpy_exponent, add_exponent, count;
2028 boolean inexact = FALSE, is_tiny = FALSE;
2030 unsigned int signlessleft1, signlessright1, save;
2031 register int result_exponent, diff_exponent;
2032 int sign_save, jumpsize;
2034 Sgl_copyfromptr(src1ptr,opnd1);
2035 Sgl_copyfromptr(src2ptr,opnd2);
2036 Sgl_copyfromptr(src3ptr,opnd3);
2039 * set sign bit of result of multiply
2041 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042 Sgl_setzero(resultp1);
2043 else
2044 Sgl_setnegativezero(resultp1);
2047 * Generate multiply exponent
2049 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2052 * check first operand for NaN's or infinity
2054 if (Sgl_isinfinity_exponent(opnd1)) {
2055 if (Sgl_iszero_mantissa(opnd1)) {
2056 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057 if (Sgl_iszero_exponentmantissa(opnd2)) {
2059 * invalid since operands are infinity
2060 * and zero
2062 if (Is_invalidtrap_enabled())
2063 return(OPC_2E_INVALIDEXCEPTION);
2064 Set_invalidflag();
2065 Sgl_makequietnan(resultp1);
2066 Sgl_copytoptr(resultp1,dstptr);
2067 return(NOEXCEPTION);
2070 * Check third operand for infinity with a
2071 * sign opposite of the multiply result
2073 if (Sgl_isinfinity(opnd3) &&
2074 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2076 * invalid since attempting a magnitude
2077 * subtraction of infinities
2079 if (Is_invalidtrap_enabled())
2080 return(OPC_2E_INVALIDEXCEPTION);
2081 Set_invalidflag();
2082 Sgl_makequietnan(resultp1);
2083 Sgl_copytoptr(resultp1,dstptr);
2084 return(NOEXCEPTION);
2088 * return infinity
2090 Sgl_setinfinity_exponentmantissa(resultp1);
2091 Sgl_copytoptr(resultp1,dstptr);
2092 return(NOEXCEPTION);
2095 else {
2097 * is NaN; signaling or quiet?
2099 if (Sgl_isone_signaling(opnd1)) {
2100 /* trap if INVALIDTRAP enabled */
2101 if (Is_invalidtrap_enabled())
2102 return(OPC_2E_INVALIDEXCEPTION);
2103 /* make NaN quiet */
2104 Set_invalidflag();
2105 Sgl_set_quiet(opnd1);
2108 * is second operand a signaling NaN?
2110 else if (Sgl_is_signalingnan(opnd2)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION);
2114 /* make NaN quiet */
2115 Set_invalidflag();
2116 Sgl_set_quiet(opnd2);
2117 Sgl_copytoptr(opnd2,dstptr);
2118 return(NOEXCEPTION);
2121 * is third operand a signaling NaN?
2123 else if (Sgl_is_signalingnan(opnd3)) {
2124 /* trap if INVALIDTRAP enabled */
2125 if (Is_invalidtrap_enabled())
2126 return(OPC_2E_INVALIDEXCEPTION);
2127 /* make NaN quiet */
2128 Set_invalidflag();
2129 Sgl_set_quiet(opnd3);
2130 Sgl_copytoptr(opnd3,dstptr);
2131 return(NOEXCEPTION);
2134 * return quiet NaN
2136 Sgl_copytoptr(opnd1,dstptr);
2137 return(NOEXCEPTION);
2142 * check second operand for NaN's or infinity
2144 if (Sgl_isinfinity_exponent(opnd2)) {
2145 if (Sgl_iszero_mantissa(opnd2)) {
2146 if (Sgl_isnotnan(opnd3)) {
2147 if (Sgl_iszero_exponentmantissa(opnd1)) {
2149 * invalid since multiply operands are
2150 * zero & infinity
2152 if (Is_invalidtrap_enabled())
2153 return(OPC_2E_INVALIDEXCEPTION);
2154 Set_invalidflag();
2155 Sgl_makequietnan(opnd2);
2156 Sgl_copytoptr(opnd2,dstptr);
2157 return(NOEXCEPTION);
2161 * Check third operand for infinity with a
2162 * sign opposite of the multiply result
2164 if (Sgl_isinfinity(opnd3) &&
2165 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2167 * invalid since attempting a magnitude
2168 * subtraction of infinities
2170 if (Is_invalidtrap_enabled())
2171 return(OPC_2E_INVALIDEXCEPTION);
2172 Set_invalidflag();
2173 Sgl_makequietnan(resultp1);
2174 Sgl_copytoptr(resultp1,dstptr);
2175 return(NOEXCEPTION);
2179 * return infinity
2181 Sgl_setinfinity_exponentmantissa(resultp1);
2182 Sgl_copytoptr(resultp1,dstptr);
2183 return(NOEXCEPTION);
2186 else {
2188 * is NaN; signaling or quiet?
2190 if (Sgl_isone_signaling(opnd2)) {
2191 /* trap if INVALIDTRAP enabled */
2192 if (Is_invalidtrap_enabled())
2193 return(OPC_2E_INVALIDEXCEPTION);
2194 /* make NaN quiet */
2195 Set_invalidflag();
2196 Sgl_set_quiet(opnd2);
2199 * is third operand a signaling NaN?
2201 else if (Sgl_is_signalingnan(opnd3)) {
2202 /* trap if INVALIDTRAP enabled */
2203 if (Is_invalidtrap_enabled())
2204 return(OPC_2E_INVALIDEXCEPTION);
2205 /* make NaN quiet */
2206 Set_invalidflag();
2207 Sgl_set_quiet(opnd3);
2208 Sgl_copytoptr(opnd3,dstptr);
2209 return(NOEXCEPTION);
2212 * return quiet NaN
2214 Sgl_copytoptr(opnd2,dstptr);
2215 return(NOEXCEPTION);
2220 * check third operand for NaN's or infinity
2222 if (Sgl_isinfinity_exponent(opnd3)) {
2223 if (Sgl_iszero_mantissa(opnd3)) {
2224 /* return infinity */
2225 Sgl_copytoptr(opnd3,dstptr);
2226 return(NOEXCEPTION);
2227 } else {
2229 * is NaN; signaling or quiet?
2231 if (Sgl_isone_signaling(opnd3)) {
2232 /* trap if INVALIDTRAP enabled */
2233 if (Is_invalidtrap_enabled())
2234 return(OPC_2E_INVALIDEXCEPTION);
2235 /* make NaN quiet */
2236 Set_invalidflag();
2237 Sgl_set_quiet(opnd3);
2240 * return quiet NaN
2242 Sgl_copytoptr(opnd3,dstptr);
2243 return(NOEXCEPTION);
2248 * Generate multiply mantissa
2250 if (Sgl_isnotzero_exponent(opnd1)) {
2251 /* set hidden bit */
2252 Sgl_clear_signexponent_set_hidden(opnd1);
2254 else {
2255 /* check for zero */
2256 if (Sgl_iszero_mantissa(opnd1)) {
2258 * Perform the add opnd3 with zero here.
2260 if (Sgl_iszero_exponentmantissa(opnd3)) {
2261 if (Is_rounding_mode(ROUNDMINUS)) {
2262 Sgl_or_signs(opnd3,resultp1);
2263 } else {
2264 Sgl_and_signs(opnd3,resultp1);
2268 * Now let's check for trapped underflow case.
2270 else if (Sgl_iszero_exponent(opnd3) &&
2271 Is_underflowtrap_enabled()) {
2272 /* need to normalize results mantissa */
2273 sign_save = Sgl_signextendedsign(opnd3);
2274 result_exponent = 0;
2275 Sgl_leftshiftby1(opnd3);
2276 Sgl_normalize(opnd3,result_exponent);
2277 Sgl_set_sign(opnd3,/*using*/sign_save);
2278 Sgl_setwrapped_exponent(opnd3,result_exponent,
2279 unfl);
2280 Sgl_copytoptr(opnd3,dstptr);
2281 /* inexact = FALSE */
2282 return(OPC_2E_UNDERFLOWEXCEPTION);
2284 Sgl_copytoptr(opnd3,dstptr);
2285 return(NOEXCEPTION);
2287 /* is denormalized, adjust exponent */
2288 Sgl_clear_signexponent(opnd1);
2289 Sgl_leftshiftby1(opnd1);
2290 Sgl_normalize(opnd1,mpy_exponent);
2292 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293 if (Sgl_isnotzero_exponent(opnd2)) {
2294 Sgl_clear_signexponent_set_hidden(opnd2);
2296 else {
2297 /* check for zero */
2298 if (Sgl_iszero_mantissa(opnd2)) {
2300 * Perform the add opnd3 with zero here.
2302 if (Sgl_iszero_exponentmantissa(opnd3)) {
2303 if (Is_rounding_mode(ROUNDMINUS)) {
2304 Sgl_or_signs(opnd3,resultp1);
2305 } else {
2306 Sgl_and_signs(opnd3,resultp1);
2310 * Now let's check for trapped underflow case.
2312 else if (Sgl_iszero_exponent(opnd3) &&
2313 Is_underflowtrap_enabled()) {
2314 /* need to normalize results mantissa */
2315 sign_save = Sgl_signextendedsign(opnd3);
2316 result_exponent = 0;
2317 Sgl_leftshiftby1(opnd3);
2318 Sgl_normalize(opnd3,result_exponent);
2319 Sgl_set_sign(opnd3,/*using*/sign_save);
2320 Sgl_setwrapped_exponent(opnd3,result_exponent,
2321 unfl);
2322 Sgl_copytoptr(opnd3,dstptr);
2323 /* inexact = FALSE */
2324 return(OPC_2E_UNDERFLOWEXCEPTION);
2326 Sgl_copytoptr(opnd3,dstptr);
2327 return(NOEXCEPTION);
2329 /* is denormalized; want to normalize */
2330 Sgl_clear_signexponent(opnd2);
2331 Sgl_leftshiftby1(opnd2);
2332 Sgl_normalize(opnd2,mpy_exponent);
2335 /* Multiply the first two source mantissas together */
2338 * The intermediate result will be kept in tmpres,
2339 * which needs enough room for 106 bits of mantissa,
2340 * so lets call it a Double extended.
2342 Sglext_setzero(tmpresp1,tmpresp2);
2345 * Four bits at a time are inspected in each loop, and a
2346 * simple shift and add multiply algorithm is used.
2348 for (count = SGL_P-1; count >= 0; count -= 4) {
2349 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350 if (Sbit28(opnd1)) {
2351 /* Twoword_add should be an ADD followed by 2 ADDC's */
2352 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2354 if (Sbit29(opnd1)) {
2355 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2357 if (Sbit30(opnd1)) {
2358 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2360 if (Sbit31(opnd1)) {
2361 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2363 Sgl_rightshiftby4(opnd1);
2365 if (Is_sexthiddenoverflow(tmpresp1)) {
2366 /* result mantissa >= 2 (mantissa overflow) */
2367 mpy_exponent++;
2368 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369 } else {
2370 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2374 * Restore the sign of the mpy result which was saved in resultp1.
2375 * The exponent will continue to be kept in mpy_exponent.
2377 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2380 * No rounding is required, since the result of the multiply
2381 * is exact in the extended format.
2385 * Now we are ready to perform the add portion of the operation.
2387 * The exponents need to be kept as integers for now, since the
2388 * multiply result might not fit into the exponent field. We
2389 * can't overflow or underflow because of this yet, since the
2390 * add could bring the final result back into range.
2392 add_exponent = Sgl_exponent(opnd3);
2395 * Check for denormalized or zero add operand.
2397 if (add_exponent == 0) {
2398 /* check for zero */
2399 if (Sgl_iszero_mantissa(opnd3)) {
2400 /* right is zero */
2401 /* Left can't be zero and must be result.
2403 * The final result is now in tmpres and mpy_exponent,
2404 * and needs to be rounded and squeezed back into
2405 * double precision format from double extended.
2407 result_exponent = mpy_exponent;
2408 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410 goto round;
2414 * Neither are zeroes.
2415 * Adjust exponent and normalize add operand.
2417 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2418 Sgl_clear_signexponent(opnd3);
2419 Sgl_leftshiftby1(opnd3);
2420 Sgl_normalize(opnd3,add_exponent);
2421 Sgl_set_sign(opnd3,sign_save); /* restore sign */
2422 } else {
2423 Sgl_clear_exponent_set_hidden(opnd3);
2426 * Copy opnd3 to the double extended variable called right.
2428 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2431 * A zero "save" helps discover equal operands (for later),
2432 * and is used in swapping operands (if needed).
2434 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2437 * Compare magnitude of operands.
2439 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2444 * Set the left operand to the larger one by XOR swap.
2445 * First finish the first word "save".
2447 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449 Sglext_swap_lower(tmpresp2,rightp2);
2450 /* also setup exponents used in rest of routine */
2451 diff_exponent = add_exponent - mpy_exponent;
2452 result_exponent = add_exponent;
2453 } else {
2454 /* also setup exponents used in rest of routine */
2455 diff_exponent = mpy_exponent - add_exponent;
2456 result_exponent = mpy_exponent;
2458 /* Invariant: left is not smaller than right. */
2461 * Special case alignment of operands that would force alignment
2462 * beyond the extent of the extension. A further optimization
2463 * could special case this but only reduces the path length for
2464 * this infrequent case.
2466 if (diff_exponent > SGLEXT_THRESHOLD) {
2467 diff_exponent = SGLEXT_THRESHOLD;
2470 /* Align right operand by shifting it to the right */
2471 Sglext_clear_sign(rightp1);
2472 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2474 /* Treat sum and difference of the operands separately. */
2475 if ((int)save < 0) {
2477 * Difference of the two operands. Overflow can occur if the
2478 * multiply overflowed. A borrow can occur out of the hidden
2479 * bit and force a post normalization phase.
2481 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482 resultp1,resultp2);
2483 sign_save = Sgl_signextendedsign(resultp1);
2484 if (Sgl_iszero_hidden(resultp1)) {
2485 /* Handle normalization */
2486 /* A straightforward algorithm would now shift the
2487 * result and extension left until the hidden bit
2488 * becomes one. Not all of the extension bits need
2489 * participate in the shift. Only the two most
2490 * significant bits (round and guard) are needed.
2491 * If only a single shift is needed then the guard
2492 * bit becomes a significant low order bit and the
2493 * extension must participate in the rounding.
2494 * If more than a single shift is needed, then all
2495 * bits to the right of the guard bit are zeros,
2496 * and the guard bit may or may not be zero. */
2497 Sglext_leftshiftby1(resultp1,resultp2);
2499 /* Need to check for a zero result. The sign and
2500 * exponent fields have already been zeroed. The more
2501 * efficient test of the full object can be used.
2503 if (Sglext_iszero(resultp1,resultp2)) {
2504 /* Must have been "x-x" or "x+(-x)". */
2505 if (Is_rounding_mode(ROUNDMINUS))
2506 Sgl_setone_sign(resultp1);
2507 Sgl_copytoptr(resultp1,dstptr);
2508 return(NOEXCEPTION);
2510 result_exponent--;
2512 /* Look to see if normalization is finished. */
2513 if (Sgl_isone_hidden(resultp1)) {
2514 /* No further normalization is needed */
2515 goto round;
2518 /* Discover first one bit to determine shift amount.
2519 * Use a modified binary search. We have already
2520 * shifted the result one position right and still
2521 * not found a one so the remainder of the extension
2522 * must be zero and simplifies rounding. */
2523 /* Scan bytes */
2524 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525 Sglext_leftshiftby8(resultp1,resultp2);
2526 result_exponent -= 8;
2528 /* Now narrow it down to the nibble */
2529 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530 /* The lower nibble contains the
2531 * normalizing one */
2532 Sglext_leftshiftby4(resultp1,resultp2);
2533 result_exponent -= 4;
2535 /* Select case where first bit is set (already
2536 * normalized) otherwise select the proper shift. */
2537 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538 if (jumpsize <= 7) switch(jumpsize) {
2539 case 1:
2540 Sglext_leftshiftby3(resultp1,resultp2);
2541 result_exponent -= 3;
2542 break;
2543 case 2:
2544 case 3:
2545 Sglext_leftshiftby2(resultp1,resultp2);
2546 result_exponent -= 2;
2547 break;
2548 case 4:
2549 case 5:
2550 case 6:
2551 case 7:
2552 Sglext_leftshiftby1(resultp1,resultp2);
2553 result_exponent -= 1;
2554 break;
2556 } /* end if (hidden...)... */
2557 /* Fall through and round */
2558 } /* end if (save < 0)... */
2559 else {
2560 /* Add magnitudes */
2561 Sglext_addition(tmpresp1,tmpresp2,
2562 rightp1,rightp2, /*to*/resultp1,resultp2);
2563 sign_save = Sgl_signextendedsign(resultp1);
2564 if (Sgl_isone_hiddenoverflow(resultp1)) {
2565 /* Prenormalization required. */
2566 Sglext_arithrightshiftby1(resultp1,resultp2);
2567 result_exponent++;
2568 } /* end if hiddenoverflow... */
2569 } /* end else ...add magnitudes... */
2571 /* Round the result. If the extension and lower two words are
2572 * all zeros, then the result is exact. Otherwise round in the
2573 * correct direction. Underflow is possible. If a postnormalization
2574 * is necessary, then the mantissa is all zeros so no shift is needed.
2576 round:
2577 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2580 Sgl_set_sign(resultp1,/*using*/sign_save);
2581 if (Sglext_isnotzero_mantissap2(resultp2)) {
2582 inexact = TRUE;
2583 switch(Rounding_mode()) {
2584 case ROUNDNEAREST: /* The default. */
2585 if (Sglext_isone_highp2(resultp2)) {
2586 /* at least 1/2 ulp */
2587 if (Sglext_isnotzero_low31p2(resultp2) ||
2588 Sglext_isone_lowp1(resultp1)) {
2589 /* either exactly half way and odd or
2590 * more than 1/2ulp */
2591 Sgl_increment(resultp1);
2594 break;
2596 case ROUNDPLUS:
2597 if (Sgl_iszero_sign(resultp1)) {
2598 /* Round up positive results */
2599 Sgl_increment(resultp1);
2601 break;
2603 case ROUNDMINUS:
2604 if (Sgl_isone_sign(resultp1)) {
2605 /* Round down negative results */
2606 Sgl_increment(resultp1);
2609 case ROUNDZERO:;
2610 /* truncate is simple */
2611 } /* end switch... */
2612 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2614 if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615 /* Overflow */
2616 if (Is_overflowtrap_enabled()) {
2618 * Adjust bias of result
2620 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621 Sgl_copytoptr(resultp1,dstptr);
2622 if (inexact)
2623 if (Is_inexacttrap_enabled())
2624 return (OPC_2E_OVERFLOWEXCEPTION |
2625 OPC_2E_INEXACTEXCEPTION);
2626 else Set_inexactflag();
2627 return (OPC_2E_OVERFLOWEXCEPTION);
2629 inexact = TRUE;
2630 Set_overflowflag();
2631 Sgl_setoverflow(resultp1);
2632 } else if (result_exponent <= 0) { /* underflow case */
2633 if (Is_underflowtrap_enabled()) {
2635 * Adjust bias of result
2637 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638 Sgl_copytoptr(resultp1,dstptr);
2639 if (inexact)
2640 if (Is_inexacttrap_enabled())
2641 return (OPC_2E_UNDERFLOWEXCEPTION |
2642 OPC_2E_INEXACTEXCEPTION);
2643 else Set_inexactflag();
2644 return(OPC_2E_UNDERFLOWEXCEPTION);
2646 else if (inexact && is_tiny) Set_underflowflag();
2648 else Sgl_set_exponent(resultp1,result_exponent);
2649 Sgl_copytoptr(resultp1,dstptr);
2650 if (inexact)
2651 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652 else Set_inexactflag();
2653 return(NOEXCEPTION);