1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
5 * Floating-point emulation code
6 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
12 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
15 * Double Floating-point Multiply Fused Add
16 * Double Floating-point Multiply Negate Fused Add
17 * Single Floating-point Multiply Fused Add
18 * Single Floating-point Multiply Negate Fused Add
20 * External Interfaces:
21 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
26 * Internal Interfaces:
29 * <<please update with a overview of the operation of this file>>
36 #include "sgl_float.h"
37 #include "dbl_float.h"
41 * Double Floating-point Multiply Fused Add
46 dbl_floating_point
*src1ptr
,
47 dbl_floating_point
*src2ptr
,
48 dbl_floating_point
*src3ptr
,
50 dbl_floating_point
*dstptr
)
52 unsigned int opnd1p1
, opnd1p2
, opnd2p1
, opnd2p2
, opnd3p1
, opnd3p2
;
53 register unsigned int tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
;
54 unsigned int rightp1
, rightp2
, rightp3
, rightp4
;
55 unsigned int resultp1
, resultp2
= 0, resultp3
= 0, resultp4
= 0;
56 register int mpy_exponent
, add_exponent
, count
;
57 boolean inexact
= FALSE
, is_tiny
= FALSE
;
59 unsigned int signlessleft1
, signlessright1
, save
;
60 register int result_exponent
, diff_exponent
;
61 int sign_save
, jumpsize
;
63 Dbl_copyfromptr(src1ptr
,opnd1p1
,opnd1p2
);
64 Dbl_copyfromptr(src2ptr
,opnd2p1
,opnd2p2
);
65 Dbl_copyfromptr(src3ptr
,opnd3p1
,opnd3p2
);
68 * set sign bit of result of multiply
70 if (Dbl_sign(opnd1p1
) ^ Dbl_sign(opnd2p1
))
71 Dbl_setnegativezerop1(resultp1
);
72 else Dbl_setzerop1(resultp1
);
75 * Generate multiply exponent
77 mpy_exponent
= Dbl_exponent(opnd1p1
) + Dbl_exponent(opnd2p1
) - DBL_BIAS
;
80 * check first operand for NaN's or infinity
82 if (Dbl_isinfinity_exponent(opnd1p1
)) {
83 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
84 if (Dbl_isnotnan(opnd2p1
,opnd2p2
) &&
85 Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
86 if (Dbl_iszero_exponentmantissa(opnd2p1
,opnd2p2
)) {
88 * invalid since operands are infinity
91 if (Is_invalidtrap_enabled())
92 return(OPC_2E_INVALIDEXCEPTION
);
94 Dbl_makequietnan(resultp1
,resultp2
);
95 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
99 * Check third operand for infinity with a
100 * sign opposite of the multiply result
102 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
103 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
105 * invalid since attempting a magnitude
106 * subtraction of infinities
108 if (Is_invalidtrap_enabled())
109 return(OPC_2E_INVALIDEXCEPTION
);
111 Dbl_makequietnan(resultp1
,resultp2
);
112 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
119 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
120 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
126 * is NaN; signaling or quiet?
128 if (Dbl_isone_signaling(opnd1p1
)) {
129 /* trap if INVALIDTRAP enabled */
130 if (Is_invalidtrap_enabled())
131 return(OPC_2E_INVALIDEXCEPTION
);
134 Dbl_set_quiet(opnd1p1
);
137 * is second operand a signaling NaN?
139 else if (Dbl_is_signalingnan(opnd2p1
)) {
140 /* trap if INVALIDTRAP enabled */
141 if (Is_invalidtrap_enabled())
142 return(OPC_2E_INVALIDEXCEPTION
);
145 Dbl_set_quiet(opnd2p1
);
146 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
150 * is third operand a signaling NaN?
152 else if (Dbl_is_signalingnan(opnd3p1
)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION
);
158 Dbl_set_quiet(opnd3p1
);
159 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
165 Dbl_copytoptr(opnd1p1
,opnd1p2
,dstptr
);
171 * check second operand for NaN's or infinity
173 if (Dbl_isinfinity_exponent(opnd2p1
)) {
174 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
175 if (Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
176 if (Dbl_iszero_exponentmantissa(opnd1p1
,opnd1p2
)) {
178 * invalid since multiply operands are
181 if (Is_invalidtrap_enabled())
182 return(OPC_2E_INVALIDEXCEPTION
);
184 Dbl_makequietnan(opnd2p1
,opnd2p2
);
185 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
190 * Check third operand for infinity with a
191 * sign opposite of the multiply result
193 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
194 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
196 * invalid since attempting a magnitude
197 * subtraction of infinities
199 if (Is_invalidtrap_enabled())
200 return(OPC_2E_INVALIDEXCEPTION
);
202 Dbl_makequietnan(resultp1
,resultp2
);
203 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
210 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
211 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
217 * is NaN; signaling or quiet?
219 if (Dbl_isone_signaling(opnd2p1
)) {
220 /* trap if INVALIDTRAP enabled */
221 if (Is_invalidtrap_enabled())
222 return(OPC_2E_INVALIDEXCEPTION
);
225 Dbl_set_quiet(opnd2p1
);
228 * is third operand a signaling NaN?
230 else if (Dbl_is_signalingnan(opnd3p1
)) {
231 /* trap if INVALIDTRAP enabled */
232 if (Is_invalidtrap_enabled())
233 return(OPC_2E_INVALIDEXCEPTION
);
236 Dbl_set_quiet(opnd3p1
);
237 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
243 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
249 * check third operand for NaN's or infinity
251 if (Dbl_isinfinity_exponent(opnd3p1
)) {
252 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
253 /* return infinity */
254 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
258 * is NaN; signaling or quiet?
260 if (Dbl_isone_signaling(opnd3p1
)) {
261 /* trap if INVALIDTRAP enabled */
262 if (Is_invalidtrap_enabled())
263 return(OPC_2E_INVALIDEXCEPTION
);
266 Dbl_set_quiet(opnd3p1
);
271 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
277 * Generate multiply mantissa
279 if (Dbl_isnotzero_exponent(opnd1p1
)) {
281 Dbl_clear_signexponent_set_hidden(opnd1p1
);
285 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
287 * Perform the add opnd3 with zero here.
289 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
290 if (Is_rounding_mode(ROUNDMINUS
)) {
291 Dbl_or_signs(opnd3p1
,resultp1
);
293 Dbl_and_signs(opnd3p1
,resultp1
);
297 * Now let's check for trapped underflow case.
299 else if (Dbl_iszero_exponent(opnd3p1
) &&
300 Is_underflowtrap_enabled()) {
301 /* need to normalize results mantissa */
302 sign_save
= Dbl_signextendedsign(opnd3p1
);
304 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
305 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
306 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
307 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
309 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
310 /* inexact = FALSE */
311 return(OPC_2E_UNDERFLOWEXCEPTION
);
313 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
316 /* is denormalized, adjust exponent */
317 Dbl_clear_signexponent(opnd1p1
);
318 Dbl_leftshiftby1(opnd1p1
,opnd1p2
);
319 Dbl_normalize(opnd1p1
,opnd1p2
,mpy_exponent
);
321 /* opnd2 needs to have hidden bit set with msb in hidden bit */
322 if (Dbl_isnotzero_exponent(opnd2p1
)) {
323 Dbl_clear_signexponent_set_hidden(opnd2p1
);
327 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
329 * Perform the add opnd3 with zero here.
331 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
332 if (Is_rounding_mode(ROUNDMINUS
)) {
333 Dbl_or_signs(opnd3p1
,resultp1
);
335 Dbl_and_signs(opnd3p1
,resultp1
);
339 * Now let's check for trapped underflow case.
341 else if (Dbl_iszero_exponent(opnd3p1
) &&
342 Is_underflowtrap_enabled()) {
343 /* need to normalize results mantissa */
344 sign_save
= Dbl_signextendedsign(opnd3p1
);
346 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
347 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
348 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
349 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
351 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
352 /* inexact = FALSE */
353 return(OPC_2E_UNDERFLOWEXCEPTION
);
355 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
358 /* is denormalized; want to normalize */
359 Dbl_clear_signexponent(opnd2p1
);
360 Dbl_leftshiftby1(opnd2p1
,opnd2p2
);
361 Dbl_normalize(opnd2p1
,opnd2p2
,mpy_exponent
);
364 /* Multiply the first two source mantissas together */
367 * The intermediate result will be kept in tmpres,
368 * which needs enough room for 106 bits of mantissa,
369 * so lets call it a Double extended.
371 Dblext_setzero(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
374 * Four bits at a time are inspected in each loop, and a
375 * simple shift and add multiply algorithm is used.
377 for (count
= DBL_P
-1; count
>= 0; count
-= 4) {
378 Dblext_rightshiftby4(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
379 if (Dbit28p2(opnd1p2
)) {
380 /* Fourword_add should be an ADD followed by 3 ADDC's */
381 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
382 opnd2p1
<<3 | opnd2p2
>>29, opnd2p2
<<3, 0, 0);
384 if (Dbit29p2(opnd1p2
)) {
385 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
386 opnd2p1
<<2 | opnd2p2
>>30, opnd2p2
<<2, 0, 0);
388 if (Dbit30p2(opnd1p2
)) {
389 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
390 opnd2p1
<<1 | opnd2p2
>>31, opnd2p2
<<1, 0, 0);
392 if (Dbit31p2(opnd1p2
)) {
393 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
394 opnd2p1
, opnd2p2
, 0, 0);
396 Dbl_rightshiftby4(opnd1p1
,opnd1p2
);
398 if (Is_dexthiddenoverflow(tmpresp1
)) {
399 /* result mantissa >= 2 (mantissa overflow) */
401 Dblext_rightshiftby1(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
405 * Restore the sign of the mpy result which was saved in resultp1.
406 * The exponent will continue to be kept in mpy_exponent.
408 Dblext_set_sign(tmpresp1
,Dbl_sign(resultp1
));
411 * No rounding is required, since the result of the multiply
412 * is exact in the extended format.
416 * Now we are ready to perform the add portion of the operation.
418 * The exponents need to be kept as integers for now, since the
419 * multiply result might not fit into the exponent field. We
420 * can't overflow or underflow because of this yet, since the
421 * add could bring the final result back into range.
423 add_exponent
= Dbl_exponent(opnd3p1
);
426 * Check for denormalized or zero add operand.
428 if (add_exponent
== 0) {
430 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
432 /* Left can't be zero and must be result.
434 * The final result is now in tmpres and mpy_exponent,
435 * and needs to be rounded and squeezed back into
436 * double precision format from double extended.
438 result_exponent
= mpy_exponent
;
439 Dblext_copy(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
440 resultp1
,resultp2
,resultp3
,resultp4
);
441 sign_save
= Dbl_signextendedsign(resultp1
);/*save sign*/
446 * Neither are zeroes.
447 * Adjust exponent and normalize add operand.
449 sign_save
= Dbl_signextendedsign(opnd3p1
); /* save sign */
450 Dbl_clear_signexponent(opnd3p1
);
451 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
452 Dbl_normalize(opnd3p1
,opnd3p2
,add_exponent
);
453 Dbl_set_sign(opnd3p1
,sign_save
); /* restore sign */
455 Dbl_clear_exponent_set_hidden(opnd3p1
);
458 * Copy opnd3 to the double extended variable called right.
460 Dbl_copyto_dblext(opnd3p1
,opnd3p2
,rightp1
,rightp2
,rightp3
,rightp4
);
463 * A zero "save" helps discover equal operands (for later),
464 * and is used in swapping operands (if needed).
466 Dblext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
469 * Compare magnitude of operands.
471 Dblext_copytoint_exponentmantissap1(tmpresp1
,signlessleft1
);
472 Dblext_copytoint_exponentmantissap1(rightp1
,signlessright1
);
473 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
474 Dblext_ismagnitudeless(tmpresp2
,rightp2
,signlessleft1
,signlessright1
)){
476 * Set the left operand to the larger one by XOR swap.
477 * First finish the first word "save".
479 Dblext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
480 Dblext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
481 Dblext_swap_lower(tmpresp2
,tmpresp3
,tmpresp4
,
482 rightp2
,rightp3
,rightp4
);
483 /* also setup exponents used in rest of routine */
484 diff_exponent
= add_exponent
- mpy_exponent
;
485 result_exponent
= add_exponent
;
487 /* also setup exponents used in rest of routine */
488 diff_exponent
= mpy_exponent
- add_exponent
;
489 result_exponent
= mpy_exponent
;
491 /* Invariant: left is not smaller than right. */
494 * Special case alignment of operands that would force alignment
495 * beyond the extent of the extension. A further optimization
496 * could special case this but only reduces the path length for
497 * this infrequent case.
499 if (diff_exponent
> DBLEXT_THRESHOLD
) {
500 diff_exponent
= DBLEXT_THRESHOLD
;
503 /* Align right operand by shifting it to the right */
504 Dblext_clear_sign(rightp1
);
505 Dblext_right_align(rightp1
,rightp2
,rightp3
,rightp4
,
506 /*shifted by*/diff_exponent
);
508 /* Treat sum and difference of the operands separately. */
511 * Difference of the two operands. Overflow can occur if the
512 * multiply overflowed. A borrow can occur out of the hidden
513 * bit and force a post normalization phase.
515 Dblext_subtract(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
516 rightp1
,rightp2
,rightp3
,rightp4
,
517 resultp1
,resultp2
,resultp3
,resultp4
);
518 sign_save
= Dbl_signextendedsign(resultp1
);
519 if (Dbl_iszero_hidden(resultp1
)) {
520 /* Handle normalization */
521 /* A straightforward algorithm would now shift the
522 * result and extension left until the hidden bit
523 * becomes one. Not all of the extension bits need
524 * participate in the shift. Only the two most
525 * significant bits (round and guard) are needed.
526 * If only a single shift is needed then the guard
527 * bit becomes a significant low order bit and the
528 * extension must participate in the rounding.
529 * If more than a single shift is needed, then all
530 * bits to the right of the guard bit are zeros,
531 * and the guard bit may or may not be zero. */
532 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
535 /* Need to check for a zero result. The sign and
536 * exponent fields have already been zeroed. The more
537 * efficient test of the full object can be used.
539 if(Dblext_iszero(resultp1
,resultp2
,resultp3
,resultp4
)){
540 /* Must have been "x-x" or "x+(-x)". */
541 if (Is_rounding_mode(ROUNDMINUS
))
542 Dbl_setone_sign(resultp1
);
543 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
548 /* Look to see if normalization is finished. */
549 if (Dbl_isone_hidden(resultp1
)) {
550 /* No further normalization is needed */
554 /* Discover first one bit to determine shift amount.
555 * Use a modified binary search. We have already
556 * shifted the result one position right and still
557 * not found a one so the remainder of the extension
558 * must be zero and simplifies rounding. */
560 while (Dbl_iszero_hiddenhigh7mantissa(resultp1
)) {
561 Dblext_leftshiftby8(resultp1
,resultp2
,resultp3
,resultp4
);
562 result_exponent
-= 8;
564 /* Now narrow it down to the nibble */
565 if (Dbl_iszero_hiddenhigh3mantissa(resultp1
)) {
566 /* The lower nibble contains the
568 Dblext_leftshiftby4(resultp1
,resultp2
,resultp3
,resultp4
);
569 result_exponent
-= 4;
571 /* Select case where first bit is set (already
572 * normalized) otherwise select the proper shift. */
573 jumpsize
= Dbl_hiddenhigh3mantissa(resultp1
);
574 if (jumpsize
<= 7) switch(jumpsize
) {
576 Dblext_leftshiftby3(resultp1
,resultp2
,resultp3
,
578 result_exponent
-= 3;
582 Dblext_leftshiftby2(resultp1
,resultp2
,resultp3
,
584 result_exponent
-= 2;
590 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
592 result_exponent
-= 1;
595 } /* end if (hidden...)... */
596 /* Fall through and round */
597 } /* end if (save < 0)... */
600 Dblext_addition(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
601 rightp1
,rightp2
,rightp3
,rightp4
,
602 /*to*/resultp1
,resultp2
,resultp3
,resultp4
);
603 sign_save
= Dbl_signextendedsign(resultp1
);
604 if (Dbl_isone_hiddenoverflow(resultp1
)) {
605 /* Prenormalization required. */
606 Dblext_arithrightshiftby1(resultp1
,resultp2
,resultp3
,
609 } /* end if hiddenoverflow... */
610 } /* end else ...add magnitudes... */
612 /* Round the result. If the extension and lower two words are
613 * all zeros, then the result is exact. Otherwise round in the
614 * correct direction. Underflow is possible. If a postnormalization
615 * is necessary, then the mantissa is all zeros so no shift is needed.
618 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
619 Dblext_denormalize(resultp1
,resultp2
,resultp3
,resultp4
,
620 result_exponent
,is_tiny
);
622 Dbl_set_sign(resultp1
,/*using*/sign_save
);
623 if (Dblext_isnotzero_mantissap3(resultp3
) ||
624 Dblext_isnotzero_mantissap4(resultp4
)) {
626 switch(Rounding_mode()) {
627 case ROUNDNEAREST
: /* The default. */
628 if (Dblext_isone_highp3(resultp3
)) {
629 /* at least 1/2 ulp */
630 if (Dblext_isnotzero_low31p3(resultp3
) ||
631 Dblext_isnotzero_mantissap4(resultp4
) ||
632 Dblext_isone_lowp2(resultp2
)) {
633 /* either exactly half way and odd or
634 * more than 1/2ulp */
635 Dbl_increment(resultp1
,resultp2
);
641 if (Dbl_iszero_sign(resultp1
)) {
642 /* Round up positive results */
643 Dbl_increment(resultp1
,resultp2
);
648 if (Dbl_isone_sign(resultp1
)) {
649 /* Round down negative results */
650 Dbl_increment(resultp1
,resultp2
);
654 /* truncate is simple */
655 } /* end switch... */
656 if (Dbl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
658 if (result_exponent
>= DBL_INFINITY_EXPONENT
) {
659 /* trap if OVERFLOWTRAP enabled */
660 if (Is_overflowtrap_enabled()) {
662 * Adjust bias of result
664 Dbl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
665 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
667 if (Is_inexacttrap_enabled())
668 return (OPC_2E_OVERFLOWEXCEPTION
|
669 OPC_2E_INEXACTEXCEPTION
);
670 else Set_inexactflag();
671 return (OPC_2E_OVERFLOWEXCEPTION
);
675 /* set result to infinity or largest number */
676 Dbl_setoverflow(resultp1
,resultp2
);
678 } else if (result_exponent
<= 0) { /* underflow case */
679 if (Is_underflowtrap_enabled()) {
681 * Adjust bias of result
683 Dbl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
684 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
686 if (Is_inexacttrap_enabled())
687 return (OPC_2E_UNDERFLOWEXCEPTION
|
688 OPC_2E_INEXACTEXCEPTION
);
689 else Set_inexactflag();
690 return(OPC_2E_UNDERFLOWEXCEPTION
);
692 else if (inexact
&& is_tiny
) Set_underflowflag();
694 else Dbl_set_exponent(resultp1
,result_exponent
);
695 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
697 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
698 else Set_inexactflag();
703 * Double Floating-point Multiply Negate Fused Add
706 dbl_fmpynfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
708 dbl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
709 unsigned int *status
;
711 unsigned int opnd1p1
, opnd1p2
, opnd2p1
, opnd2p2
, opnd3p1
, opnd3p2
;
712 register unsigned int tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
;
713 unsigned int rightp1
, rightp2
, rightp3
, rightp4
;
714 unsigned int resultp1
, resultp2
= 0, resultp3
= 0, resultp4
= 0;
715 register int mpy_exponent
, add_exponent
, count
;
716 boolean inexact
= FALSE
, is_tiny
= FALSE
;
718 unsigned int signlessleft1
, signlessright1
, save
;
719 register int result_exponent
, diff_exponent
;
720 int sign_save
, jumpsize
;
722 Dbl_copyfromptr(src1ptr
,opnd1p1
,opnd1p2
);
723 Dbl_copyfromptr(src2ptr
,opnd2p1
,opnd2p2
);
724 Dbl_copyfromptr(src3ptr
,opnd3p1
,opnd3p2
);
727 * set sign bit of result of multiply
729 if (Dbl_sign(opnd1p1
) ^ Dbl_sign(opnd2p1
))
730 Dbl_setzerop1(resultp1
);
732 Dbl_setnegativezerop1(resultp1
);
735 * Generate multiply exponent
737 mpy_exponent
= Dbl_exponent(opnd1p1
) + Dbl_exponent(opnd2p1
) - DBL_BIAS
;
740 * check first operand for NaN's or infinity
742 if (Dbl_isinfinity_exponent(opnd1p1
)) {
743 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
744 if (Dbl_isnotnan(opnd2p1
,opnd2p2
) &&
745 Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
746 if (Dbl_iszero_exponentmantissa(opnd2p1
,opnd2p2
)) {
748 * invalid since operands are infinity
751 if (Is_invalidtrap_enabled())
752 return(OPC_2E_INVALIDEXCEPTION
);
754 Dbl_makequietnan(resultp1
,resultp2
);
755 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
759 * Check third operand for infinity with a
760 * sign opposite of the multiply result
762 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
763 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
765 * invalid since attempting a magnitude
766 * subtraction of infinities
768 if (Is_invalidtrap_enabled())
769 return(OPC_2E_INVALIDEXCEPTION
);
771 Dbl_makequietnan(resultp1
,resultp2
);
772 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
779 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
780 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
786 * is NaN; signaling or quiet?
788 if (Dbl_isone_signaling(opnd1p1
)) {
789 /* trap if INVALIDTRAP enabled */
790 if (Is_invalidtrap_enabled())
791 return(OPC_2E_INVALIDEXCEPTION
);
794 Dbl_set_quiet(opnd1p1
);
797 * is second operand a signaling NaN?
799 else if (Dbl_is_signalingnan(opnd2p1
)) {
800 /* trap if INVALIDTRAP enabled */
801 if (Is_invalidtrap_enabled())
802 return(OPC_2E_INVALIDEXCEPTION
);
805 Dbl_set_quiet(opnd2p1
);
806 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
810 * is third operand a signaling NaN?
812 else if (Dbl_is_signalingnan(opnd3p1
)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION
);
818 Dbl_set_quiet(opnd3p1
);
819 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
825 Dbl_copytoptr(opnd1p1
,opnd1p2
,dstptr
);
831 * check second operand for NaN's or infinity
833 if (Dbl_isinfinity_exponent(opnd2p1
)) {
834 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
835 if (Dbl_isnotnan(opnd3p1
,opnd3p2
)) {
836 if (Dbl_iszero_exponentmantissa(opnd1p1
,opnd1p2
)) {
838 * invalid since multiply operands are
841 if (Is_invalidtrap_enabled())
842 return(OPC_2E_INVALIDEXCEPTION
);
844 Dbl_makequietnan(opnd2p1
,opnd2p2
);
845 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
850 * Check third operand for infinity with a
851 * sign opposite of the multiply result
853 if (Dbl_isinfinity(opnd3p1
,opnd3p2
) &&
854 (Dbl_sign(resultp1
) ^ Dbl_sign(opnd3p1
))) {
856 * invalid since attempting a magnitude
857 * subtraction of infinities
859 if (Is_invalidtrap_enabled())
860 return(OPC_2E_INVALIDEXCEPTION
);
862 Dbl_makequietnan(resultp1
,resultp2
);
863 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
870 Dbl_setinfinity_exponentmantissa(resultp1
,resultp2
);
871 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
877 * is NaN; signaling or quiet?
879 if (Dbl_isone_signaling(opnd2p1
)) {
880 /* trap if INVALIDTRAP enabled */
881 if (Is_invalidtrap_enabled())
882 return(OPC_2E_INVALIDEXCEPTION
);
885 Dbl_set_quiet(opnd2p1
);
888 * is third operand a signaling NaN?
890 else if (Dbl_is_signalingnan(opnd3p1
)) {
891 /* trap if INVALIDTRAP enabled */
892 if (Is_invalidtrap_enabled())
893 return(OPC_2E_INVALIDEXCEPTION
);
896 Dbl_set_quiet(opnd3p1
);
897 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
903 Dbl_copytoptr(opnd2p1
,opnd2p2
,dstptr
);
909 * check third operand for NaN's or infinity
911 if (Dbl_isinfinity_exponent(opnd3p1
)) {
912 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
913 /* return infinity */
914 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
918 * is NaN; signaling or quiet?
920 if (Dbl_isone_signaling(opnd3p1
)) {
921 /* trap if INVALIDTRAP enabled */
922 if (Is_invalidtrap_enabled())
923 return(OPC_2E_INVALIDEXCEPTION
);
926 Dbl_set_quiet(opnd3p1
);
931 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
937 * Generate multiply mantissa
939 if (Dbl_isnotzero_exponent(opnd1p1
)) {
941 Dbl_clear_signexponent_set_hidden(opnd1p1
);
945 if (Dbl_iszero_mantissa(opnd1p1
,opnd1p2
)) {
947 * Perform the add opnd3 with zero here.
949 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
950 if (Is_rounding_mode(ROUNDMINUS
)) {
951 Dbl_or_signs(opnd3p1
,resultp1
);
953 Dbl_and_signs(opnd3p1
,resultp1
);
957 * Now let's check for trapped underflow case.
959 else if (Dbl_iszero_exponent(opnd3p1
) &&
960 Is_underflowtrap_enabled()) {
961 /* need to normalize results mantissa */
962 sign_save
= Dbl_signextendedsign(opnd3p1
);
964 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
965 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
966 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
967 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
969 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
970 /* inexact = FALSE */
971 return(OPC_2E_UNDERFLOWEXCEPTION
);
973 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
976 /* is denormalized, adjust exponent */
977 Dbl_clear_signexponent(opnd1p1
);
978 Dbl_leftshiftby1(opnd1p1
,opnd1p2
);
979 Dbl_normalize(opnd1p1
,opnd1p2
,mpy_exponent
);
981 /* opnd2 needs to have hidden bit set with msb in hidden bit */
982 if (Dbl_isnotzero_exponent(opnd2p1
)) {
983 Dbl_clear_signexponent_set_hidden(opnd2p1
);
987 if (Dbl_iszero_mantissa(opnd2p1
,opnd2p2
)) {
989 * Perform the add opnd3 with zero here.
991 if (Dbl_iszero_exponentmantissa(opnd3p1
,opnd3p2
)) {
992 if (Is_rounding_mode(ROUNDMINUS
)) {
993 Dbl_or_signs(opnd3p1
,resultp1
);
995 Dbl_and_signs(opnd3p1
,resultp1
);
999 * Now let's check for trapped underflow case.
1001 else if (Dbl_iszero_exponent(opnd3p1
) &&
1002 Is_underflowtrap_enabled()) {
1003 /* need to normalize results mantissa */
1004 sign_save
= Dbl_signextendedsign(opnd3p1
);
1005 result_exponent
= 0;
1006 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
1007 Dbl_normalize(opnd3p1
,opnd3p2
,result_exponent
);
1008 Dbl_set_sign(opnd3p1
,/*using*/sign_save
);
1009 Dbl_setwrapped_exponent(opnd3p1
,result_exponent
,
1011 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
1012 /* inexact = FALSE */
1013 return(OPC_2E_UNDERFLOWEXCEPTION
);
1015 Dbl_copytoptr(opnd3p1
,opnd3p2
,dstptr
);
1016 return(NOEXCEPTION
);
1018 /* is denormalized; want to normalize */
1019 Dbl_clear_signexponent(opnd2p1
);
1020 Dbl_leftshiftby1(opnd2p1
,opnd2p2
);
1021 Dbl_normalize(opnd2p1
,opnd2p2
,mpy_exponent
);
1024 /* Multiply the first two source mantissas together */
1027 * The intermediate result will be kept in tmpres,
1028 * which needs enough room for 106 bits of mantissa,
1029 * so lets call it a Double extended.
1031 Dblext_setzero(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1034 * Four bits at a time are inspected in each loop, and a
1035 * simple shift and add multiply algorithm is used.
1037 for (count
= DBL_P
-1; count
>= 0; count
-= 4) {
1038 Dblext_rightshiftby4(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1039 if (Dbit28p2(opnd1p2
)) {
1040 /* Fourword_add should be an ADD followed by 3 ADDC's */
1041 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1042 opnd2p1
<<3 | opnd2p2
>>29, opnd2p2
<<3, 0, 0);
1044 if (Dbit29p2(opnd1p2
)) {
1045 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1046 opnd2p1
<<2 | opnd2p2
>>30, opnd2p2
<<2, 0, 0);
1048 if (Dbit30p2(opnd1p2
)) {
1049 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1050 opnd2p1
<<1 | opnd2p2
>>31, opnd2p2
<<1, 0, 0);
1052 if (Dbit31p2(opnd1p2
)) {
1053 Fourword_add(tmpresp1
, tmpresp2
, tmpresp3
, tmpresp4
,
1054 opnd2p1
, opnd2p2
, 0, 0);
1056 Dbl_rightshiftby4(opnd1p1
,opnd1p2
);
1058 if (Is_dexthiddenoverflow(tmpresp1
)) {
1059 /* result mantissa >= 2 (mantissa overflow) */
1061 Dblext_rightshiftby1(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
);
1065 * Restore the sign of the mpy result which was saved in resultp1.
1066 * The exponent will continue to be kept in mpy_exponent.
1068 Dblext_set_sign(tmpresp1
,Dbl_sign(resultp1
));
1071 * No rounding is required, since the result of the multiply
1072 * is exact in the extended format.
1076 * Now we are ready to perform the add portion of the operation.
1078 * The exponents need to be kept as integers for now, since the
1079 * multiply result might not fit into the exponent field. We
1080 * can't overflow or underflow because of this yet, since the
1081 * add could bring the final result back into range.
1083 add_exponent
= Dbl_exponent(opnd3p1
);
1086 * Check for denormalized or zero add operand.
1088 if (add_exponent
== 0) {
1089 /* check for zero */
1090 if (Dbl_iszero_mantissa(opnd3p1
,opnd3p2
)) {
1092 /* Left can't be zero and must be result.
1094 * The final result is now in tmpres and mpy_exponent,
1095 * and needs to be rounded and squeezed back into
1096 * double precision format from double extended.
1098 result_exponent
= mpy_exponent
;
1099 Dblext_copy(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1100 resultp1
,resultp2
,resultp3
,resultp4
);
1101 sign_save
= Dbl_signextendedsign(resultp1
);/*save sign*/
1106 * Neither are zeroes.
1107 * Adjust exponent and normalize add operand.
1109 sign_save
= Dbl_signextendedsign(opnd3p1
); /* save sign */
1110 Dbl_clear_signexponent(opnd3p1
);
1111 Dbl_leftshiftby1(opnd3p1
,opnd3p2
);
1112 Dbl_normalize(opnd3p1
,opnd3p2
,add_exponent
);
1113 Dbl_set_sign(opnd3p1
,sign_save
); /* restore sign */
1115 Dbl_clear_exponent_set_hidden(opnd3p1
);
1118 * Copy opnd3 to the double extended variable called right.
1120 Dbl_copyto_dblext(opnd3p1
,opnd3p2
,rightp1
,rightp2
,rightp3
,rightp4
);
1123 * A zero "save" helps discover equal operands (for later),
1124 * and is used in swapping operands (if needed).
1126 Dblext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
1129 * Compare magnitude of operands.
1131 Dblext_copytoint_exponentmantissap1(tmpresp1
,signlessleft1
);
1132 Dblext_copytoint_exponentmantissap1(rightp1
,signlessright1
);
1133 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
1134 Dblext_ismagnitudeless(tmpresp2
,rightp2
,signlessleft1
,signlessright1
)){
1136 * Set the left operand to the larger one by XOR swap.
1137 * First finish the first word "save".
1139 Dblext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
1140 Dblext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
1141 Dblext_swap_lower(tmpresp2
,tmpresp3
,tmpresp4
,
1142 rightp2
,rightp3
,rightp4
);
1143 /* also setup exponents used in rest of routine */
1144 diff_exponent
= add_exponent
- mpy_exponent
;
1145 result_exponent
= add_exponent
;
1147 /* also setup exponents used in rest of routine */
1148 diff_exponent
= mpy_exponent
- add_exponent
;
1149 result_exponent
= mpy_exponent
;
1151 /* Invariant: left is not smaller than right. */
1154 * Special case alignment of operands that would force alignment
1155 * beyond the extent of the extension. A further optimization
1156 * could special case this but only reduces the path length for
1157 * this infrequent case.
1159 if (diff_exponent
> DBLEXT_THRESHOLD
) {
1160 diff_exponent
= DBLEXT_THRESHOLD
;
1163 /* Align right operand by shifting it to the right */
1164 Dblext_clear_sign(rightp1
);
1165 Dblext_right_align(rightp1
,rightp2
,rightp3
,rightp4
,
1166 /*shifted by*/diff_exponent
);
1168 /* Treat sum and difference of the operands separately. */
1169 if ((int)save
< 0) {
1171 * Difference of the two operands. Overflow can occur if the
1172 * multiply overflowed. A borrow can occur out of the hidden
1173 * bit and force a post normalization phase.
1175 Dblext_subtract(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1176 rightp1
,rightp2
,rightp3
,rightp4
,
1177 resultp1
,resultp2
,resultp3
,resultp4
);
1178 sign_save
= Dbl_signextendedsign(resultp1
);
1179 if (Dbl_iszero_hidden(resultp1
)) {
1180 /* Handle normalization */
1181 /* A straightforward algorithm would now shift the
1182 * result and extension left until the hidden bit
1183 * becomes one. Not all of the extension bits need
1184 * participate in the shift. Only the two most
1185 * significant bits (round and guard) are needed.
1186 * If only a single shift is needed then the guard
1187 * bit becomes a significant low order bit and the
1188 * extension must participate in the rounding.
1189 * If more than a single shift is needed, then all
1190 * bits to the right of the guard bit are zeros,
1191 * and the guard bit may or may not be zero. */
1192 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
1195 /* Need to check for a zero result. The sign and
1196 * exponent fields have already been zeroed. The more
1197 * efficient test of the full object can be used.
1199 if (Dblext_iszero(resultp1
,resultp2
,resultp3
,resultp4
)) {
1200 /* Must have been "x-x" or "x+(-x)". */
1201 if (Is_rounding_mode(ROUNDMINUS
))
1202 Dbl_setone_sign(resultp1
);
1203 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1204 return(NOEXCEPTION
);
1208 /* Look to see if normalization is finished. */
1209 if (Dbl_isone_hidden(resultp1
)) {
1210 /* No further normalization is needed */
1214 /* Discover first one bit to determine shift amount.
1215 * Use a modified binary search. We have already
1216 * shifted the result one position right and still
1217 * not found a one so the remainder of the extension
1218 * must be zero and simplifies rounding. */
1220 while (Dbl_iszero_hiddenhigh7mantissa(resultp1
)) {
1221 Dblext_leftshiftby8(resultp1
,resultp2
,resultp3
,resultp4
);
1222 result_exponent
-= 8;
1224 /* Now narrow it down to the nibble */
1225 if (Dbl_iszero_hiddenhigh3mantissa(resultp1
)) {
1226 /* The lower nibble contains the
1227 * normalizing one */
1228 Dblext_leftshiftby4(resultp1
,resultp2
,resultp3
,resultp4
);
1229 result_exponent
-= 4;
1231 /* Select case where first bit is set (already
1232 * normalized) otherwise select the proper shift. */
1233 jumpsize
= Dbl_hiddenhigh3mantissa(resultp1
);
1234 if (jumpsize
<= 7) switch(jumpsize
) {
1236 Dblext_leftshiftby3(resultp1
,resultp2
,resultp3
,
1238 result_exponent
-= 3;
1242 Dblext_leftshiftby2(resultp1
,resultp2
,resultp3
,
1244 result_exponent
-= 2;
1250 Dblext_leftshiftby1(resultp1
,resultp2
,resultp3
,
1252 result_exponent
-= 1;
1255 } /* end if (hidden...)... */
1256 /* Fall through and round */
1257 } /* end if (save < 0)... */
1259 /* Add magnitudes */
1260 Dblext_addition(tmpresp1
,tmpresp2
,tmpresp3
,tmpresp4
,
1261 rightp1
,rightp2
,rightp3
,rightp4
,
1262 /*to*/resultp1
,resultp2
,resultp3
,resultp4
);
1263 sign_save
= Dbl_signextendedsign(resultp1
);
1264 if (Dbl_isone_hiddenoverflow(resultp1
)) {
1265 /* Prenormalization required. */
1266 Dblext_arithrightshiftby1(resultp1
,resultp2
,resultp3
,
1269 } /* end if hiddenoverflow... */
1270 } /* end else ...add magnitudes... */
1272 /* Round the result. If the extension and lower two words are
1273 * all zeros, then the result is exact. Otherwise round in the
1274 * correct direction. Underflow is possible. If a postnormalization
1275 * is necessary, then the mantissa is all zeros so no shift is needed.
1278 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
1279 Dblext_denormalize(resultp1
,resultp2
,resultp3
,resultp4
,
1280 result_exponent
,is_tiny
);
1282 Dbl_set_sign(resultp1
,/*using*/sign_save
);
1283 if (Dblext_isnotzero_mantissap3(resultp3
) ||
1284 Dblext_isnotzero_mantissap4(resultp4
)) {
1286 switch(Rounding_mode()) {
1287 case ROUNDNEAREST
: /* The default. */
1288 if (Dblext_isone_highp3(resultp3
)) {
1289 /* at least 1/2 ulp */
1290 if (Dblext_isnotzero_low31p3(resultp3
) ||
1291 Dblext_isnotzero_mantissap4(resultp4
) ||
1292 Dblext_isone_lowp2(resultp2
)) {
1293 /* either exactly half way and odd or
1294 * more than 1/2ulp */
1295 Dbl_increment(resultp1
,resultp2
);
1301 if (Dbl_iszero_sign(resultp1
)) {
1302 /* Round up positive results */
1303 Dbl_increment(resultp1
,resultp2
);
1308 if (Dbl_isone_sign(resultp1
)) {
1309 /* Round down negative results */
1310 Dbl_increment(resultp1
,resultp2
);
1314 /* truncate is simple */
1315 } /* end switch... */
1316 if (Dbl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
1318 if (result_exponent
>= DBL_INFINITY_EXPONENT
) {
1320 if (Is_overflowtrap_enabled()) {
1322 * Adjust bias of result
1324 Dbl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
1325 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1327 if (Is_inexacttrap_enabled())
1328 return (OPC_2E_OVERFLOWEXCEPTION
|
1329 OPC_2E_INEXACTEXCEPTION
);
1330 else Set_inexactflag();
1331 return (OPC_2E_OVERFLOWEXCEPTION
);
1335 Dbl_setoverflow(resultp1
,resultp2
);
1336 } else if (result_exponent
<= 0) { /* underflow case */
1337 if (Is_underflowtrap_enabled()) {
1339 * Adjust bias of result
1341 Dbl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
1342 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1344 if (Is_inexacttrap_enabled())
1345 return (OPC_2E_UNDERFLOWEXCEPTION
|
1346 OPC_2E_INEXACTEXCEPTION
);
1347 else Set_inexactflag();
1348 return(OPC_2E_UNDERFLOWEXCEPTION
);
1350 else if (inexact
&& is_tiny
) Set_underflowflag();
1352 else Dbl_set_exponent(resultp1
,result_exponent
);
1353 Dbl_copytoptr(resultp1
,resultp2
,dstptr
);
1355 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
1356 else Set_inexactflag();
1357 return(NOEXCEPTION
);
1361 * Single Floating-point Multiply Fused Add
1364 sgl_fmpyfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
1366 sgl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
1367 unsigned int *status
;
1369 unsigned int opnd1
, opnd2
, opnd3
;
1370 register unsigned int tmpresp1
, tmpresp2
;
1371 unsigned int rightp1
, rightp2
;
1372 unsigned int resultp1
, resultp2
= 0;
1373 register int mpy_exponent
, add_exponent
, count
;
1374 boolean inexact
= FALSE
, is_tiny
= FALSE
;
1376 unsigned int signlessleft1
, signlessright1
, save
;
1377 register int result_exponent
, diff_exponent
;
1378 int sign_save
, jumpsize
;
1380 Sgl_copyfromptr(src1ptr
,opnd1
);
1381 Sgl_copyfromptr(src2ptr
,opnd2
);
1382 Sgl_copyfromptr(src3ptr
,opnd3
);
1385 * set sign bit of result of multiply
1387 if (Sgl_sign(opnd1
) ^ Sgl_sign(opnd2
))
1388 Sgl_setnegativezero(resultp1
);
1389 else Sgl_setzero(resultp1
);
1392 * Generate multiply exponent
1394 mpy_exponent
= Sgl_exponent(opnd1
) + Sgl_exponent(opnd2
) - SGL_BIAS
;
1397 * check first operand for NaN's or infinity
1399 if (Sgl_isinfinity_exponent(opnd1
)) {
1400 if (Sgl_iszero_mantissa(opnd1
)) {
1401 if (Sgl_isnotnan(opnd2
) && Sgl_isnotnan(opnd3
)) {
1402 if (Sgl_iszero_exponentmantissa(opnd2
)) {
1404 * invalid since operands are infinity
1407 if (Is_invalidtrap_enabled())
1408 return(OPC_2E_INVALIDEXCEPTION
);
1410 Sgl_makequietnan(resultp1
);
1411 Sgl_copytoptr(resultp1
,dstptr
);
1412 return(NOEXCEPTION
);
1415 * Check third operand for infinity with a
1416 * sign opposite of the multiply result
1418 if (Sgl_isinfinity(opnd3
) &&
1419 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
1421 * invalid since attempting a magnitude
1422 * subtraction of infinities
1424 if (Is_invalidtrap_enabled())
1425 return(OPC_2E_INVALIDEXCEPTION
);
1427 Sgl_makequietnan(resultp1
);
1428 Sgl_copytoptr(resultp1
,dstptr
);
1429 return(NOEXCEPTION
);
1435 Sgl_setinfinity_exponentmantissa(resultp1
);
1436 Sgl_copytoptr(resultp1
,dstptr
);
1437 return(NOEXCEPTION
);
1442 * is NaN; signaling or quiet?
1444 if (Sgl_isone_signaling(opnd1
)) {
1445 /* trap if INVALIDTRAP enabled */
1446 if (Is_invalidtrap_enabled())
1447 return(OPC_2E_INVALIDEXCEPTION
);
1448 /* make NaN quiet */
1450 Sgl_set_quiet(opnd1
);
1453 * is second operand a signaling NaN?
1455 else if (Sgl_is_signalingnan(opnd2
)) {
1456 /* trap if INVALIDTRAP enabled */
1457 if (Is_invalidtrap_enabled())
1458 return(OPC_2E_INVALIDEXCEPTION
);
1459 /* make NaN quiet */
1461 Sgl_set_quiet(opnd2
);
1462 Sgl_copytoptr(opnd2
,dstptr
);
1463 return(NOEXCEPTION
);
1466 * is third operand a signaling NaN?
1468 else if (Sgl_is_signalingnan(opnd3
)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION
);
1472 /* make NaN quiet */
1474 Sgl_set_quiet(opnd3
);
1475 Sgl_copytoptr(opnd3
,dstptr
);
1476 return(NOEXCEPTION
);
1481 Sgl_copytoptr(opnd1
,dstptr
);
1482 return(NOEXCEPTION
);
1487 * check second operand for NaN's or infinity
1489 if (Sgl_isinfinity_exponent(opnd2
)) {
1490 if (Sgl_iszero_mantissa(opnd2
)) {
1491 if (Sgl_isnotnan(opnd3
)) {
1492 if (Sgl_iszero_exponentmantissa(opnd1
)) {
1494 * invalid since multiply operands are
1497 if (Is_invalidtrap_enabled())
1498 return(OPC_2E_INVALIDEXCEPTION
);
1500 Sgl_makequietnan(opnd2
);
1501 Sgl_copytoptr(opnd2
,dstptr
);
1502 return(NOEXCEPTION
);
1506 * Check third operand for infinity with a
1507 * sign opposite of the multiply result
1509 if (Sgl_isinfinity(opnd3
) &&
1510 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
1512 * invalid since attempting a magnitude
1513 * subtraction of infinities
1515 if (Is_invalidtrap_enabled())
1516 return(OPC_2E_INVALIDEXCEPTION
);
1518 Sgl_makequietnan(resultp1
);
1519 Sgl_copytoptr(resultp1
,dstptr
);
1520 return(NOEXCEPTION
);
1526 Sgl_setinfinity_exponentmantissa(resultp1
);
1527 Sgl_copytoptr(resultp1
,dstptr
);
1528 return(NOEXCEPTION
);
1533 * is NaN; signaling or quiet?
1535 if (Sgl_isone_signaling(opnd2
)) {
1536 /* trap if INVALIDTRAP enabled */
1537 if (Is_invalidtrap_enabled())
1538 return(OPC_2E_INVALIDEXCEPTION
);
1539 /* make NaN quiet */
1541 Sgl_set_quiet(opnd2
);
1544 * is third operand a signaling NaN?
1546 else if (Sgl_is_signalingnan(opnd3
)) {
1547 /* trap if INVALIDTRAP enabled */
1548 if (Is_invalidtrap_enabled())
1549 return(OPC_2E_INVALIDEXCEPTION
);
1550 /* make NaN quiet */
1552 Sgl_set_quiet(opnd3
);
1553 Sgl_copytoptr(opnd3
,dstptr
);
1554 return(NOEXCEPTION
);
1559 Sgl_copytoptr(opnd2
,dstptr
);
1560 return(NOEXCEPTION
);
1565 * check third operand for NaN's or infinity
1567 if (Sgl_isinfinity_exponent(opnd3
)) {
1568 if (Sgl_iszero_mantissa(opnd3
)) {
1569 /* return infinity */
1570 Sgl_copytoptr(opnd3
,dstptr
);
1571 return(NOEXCEPTION
);
1574 * is NaN; signaling or quiet?
1576 if (Sgl_isone_signaling(opnd3
)) {
1577 /* trap if INVALIDTRAP enabled */
1578 if (Is_invalidtrap_enabled())
1579 return(OPC_2E_INVALIDEXCEPTION
);
1580 /* make NaN quiet */
1582 Sgl_set_quiet(opnd3
);
1587 Sgl_copytoptr(opnd3
,dstptr
);
1588 return(NOEXCEPTION
);
1593 * Generate multiply mantissa
1595 if (Sgl_isnotzero_exponent(opnd1
)) {
1596 /* set hidden bit */
1597 Sgl_clear_signexponent_set_hidden(opnd1
);
1600 /* check for zero */
1601 if (Sgl_iszero_mantissa(opnd1
)) {
1603 * Perform the add opnd3 with zero here.
1605 if (Sgl_iszero_exponentmantissa(opnd3
)) {
1606 if (Is_rounding_mode(ROUNDMINUS
)) {
1607 Sgl_or_signs(opnd3
,resultp1
);
1609 Sgl_and_signs(opnd3
,resultp1
);
1613 * Now let's check for trapped underflow case.
1615 else if (Sgl_iszero_exponent(opnd3
) &&
1616 Is_underflowtrap_enabled()) {
1617 /* need to normalize results mantissa */
1618 sign_save
= Sgl_signextendedsign(opnd3
);
1619 result_exponent
= 0;
1620 Sgl_leftshiftby1(opnd3
);
1621 Sgl_normalize(opnd3
,result_exponent
);
1622 Sgl_set_sign(opnd3
,/*using*/sign_save
);
1623 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
1625 Sgl_copytoptr(opnd3
,dstptr
);
1626 /* inexact = FALSE */
1627 return(OPC_2E_UNDERFLOWEXCEPTION
);
1629 Sgl_copytoptr(opnd3
,dstptr
);
1630 return(NOEXCEPTION
);
1632 /* is denormalized, adjust exponent */
1633 Sgl_clear_signexponent(opnd1
);
1634 Sgl_leftshiftby1(opnd1
);
1635 Sgl_normalize(opnd1
,mpy_exponent
);
1637 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1638 if (Sgl_isnotzero_exponent(opnd2
)) {
1639 Sgl_clear_signexponent_set_hidden(opnd2
);
1642 /* check for zero */
1643 if (Sgl_iszero_mantissa(opnd2
)) {
1645 * Perform the add opnd3 with zero here.
1647 if (Sgl_iszero_exponentmantissa(opnd3
)) {
1648 if (Is_rounding_mode(ROUNDMINUS
)) {
1649 Sgl_or_signs(opnd3
,resultp1
);
1651 Sgl_and_signs(opnd3
,resultp1
);
1655 * Now let's check for trapped underflow case.
1657 else if (Sgl_iszero_exponent(opnd3
) &&
1658 Is_underflowtrap_enabled()) {
1659 /* need to normalize results mantissa */
1660 sign_save
= Sgl_signextendedsign(opnd3
);
1661 result_exponent
= 0;
1662 Sgl_leftshiftby1(opnd3
);
1663 Sgl_normalize(opnd3
,result_exponent
);
1664 Sgl_set_sign(opnd3
,/*using*/sign_save
);
1665 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
1667 Sgl_copytoptr(opnd3
,dstptr
);
1668 /* inexact = FALSE */
1669 return(OPC_2E_UNDERFLOWEXCEPTION
);
1671 Sgl_copytoptr(opnd3
,dstptr
);
1672 return(NOEXCEPTION
);
1674 /* is denormalized; want to normalize */
1675 Sgl_clear_signexponent(opnd2
);
1676 Sgl_leftshiftby1(opnd2
);
1677 Sgl_normalize(opnd2
,mpy_exponent
);
1680 /* Multiply the first two source mantissas together */
1683 * The intermediate result will be kept in tmpres,
1684 * which needs enough room for 106 bits of mantissa,
1685 * so lets call it a Double extended.
1687 Sglext_setzero(tmpresp1
,tmpresp2
);
1690 * Four bits at a time are inspected in each loop, and a
1691 * simple shift and add multiply algorithm is used.
1693 for (count
= SGL_P
-1; count
>= 0; count
-= 4) {
1694 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
1695 if (Sbit28(opnd1
)) {
1696 /* Twoword_add should be an ADD followed by 2 ADDC's */
1697 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<3, 0);
1699 if (Sbit29(opnd1
)) {
1700 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<2, 0);
1702 if (Sbit30(opnd1
)) {
1703 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<1, 0);
1705 if (Sbit31(opnd1
)) {
1706 Twoword_add(tmpresp1
, tmpresp2
, opnd2
, 0);
1708 Sgl_rightshiftby4(opnd1
);
1710 if (Is_sexthiddenoverflow(tmpresp1
)) {
1711 /* result mantissa >= 2 (mantissa overflow) */
1713 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
1715 Sglext_rightshiftby3(tmpresp1
,tmpresp2
);
1719 * Restore the sign of the mpy result which was saved in resultp1.
1720 * The exponent will continue to be kept in mpy_exponent.
1722 Sglext_set_sign(tmpresp1
,Sgl_sign(resultp1
));
1725 * No rounding is required, since the result of the multiply
1726 * is exact in the extended format.
1730 * Now we are ready to perform the add portion of the operation.
1732 * The exponents need to be kept as integers for now, since the
1733 * multiply result might not fit into the exponent field. We
1734 * can't overflow or underflow because of this yet, since the
1735 * add could bring the final result back into range.
1737 add_exponent
= Sgl_exponent(opnd3
);
1740 * Check for denormalized or zero add operand.
1742 if (add_exponent
== 0) {
1743 /* check for zero */
1744 if (Sgl_iszero_mantissa(opnd3
)) {
1746 /* Left can't be zero and must be result.
1748 * The final result is now in tmpres and mpy_exponent,
1749 * and needs to be rounded and squeezed back into
1750 * double precision format from double extended.
1752 result_exponent
= mpy_exponent
;
1753 Sglext_copy(tmpresp1
,tmpresp2
,resultp1
,resultp2
);
1754 sign_save
= Sgl_signextendedsign(resultp1
);/*save sign*/
1759 * Neither are zeroes.
1760 * Adjust exponent and normalize add operand.
1762 sign_save
= Sgl_signextendedsign(opnd3
); /* save sign */
1763 Sgl_clear_signexponent(opnd3
);
1764 Sgl_leftshiftby1(opnd3
);
1765 Sgl_normalize(opnd3
,add_exponent
);
1766 Sgl_set_sign(opnd3
,sign_save
); /* restore sign */
1768 Sgl_clear_exponent_set_hidden(opnd3
);
1771 * Copy opnd3 to the double extended variable called right.
1773 Sgl_copyto_sglext(opnd3
,rightp1
,rightp2
);
1776 * A zero "save" helps discover equal operands (for later),
1777 * and is used in swapping operands (if needed).
1779 Sglext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
1782 * Compare magnitude of operands.
1784 Sglext_copytoint_exponentmantissa(tmpresp1
,signlessleft1
);
1785 Sglext_copytoint_exponentmantissa(rightp1
,signlessright1
);
1786 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
1787 Sglext_ismagnitudeless(signlessleft1
,signlessright1
)) {
1789 * Set the left operand to the larger one by XOR swap.
1790 * First finish the first word "save".
1792 Sglext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
1793 Sglext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
1794 Sglext_swap_lower(tmpresp2
,rightp2
);
1795 /* also setup exponents used in rest of routine */
1796 diff_exponent
= add_exponent
- mpy_exponent
;
1797 result_exponent
= add_exponent
;
1799 /* also setup exponents used in rest of routine */
1800 diff_exponent
= mpy_exponent
- add_exponent
;
1801 result_exponent
= mpy_exponent
;
1803 /* Invariant: left is not smaller than right. */
1806 * Special case alignment of operands that would force alignment
1807 * beyond the extent of the extension. A further optimization
1808 * could special case this but only reduces the path length for
1809 * this infrequent case.
1811 if (diff_exponent
> SGLEXT_THRESHOLD
) {
1812 diff_exponent
= SGLEXT_THRESHOLD
;
1815 /* Align right operand by shifting it to the right */
1816 Sglext_clear_sign(rightp1
);
1817 Sglext_right_align(rightp1
,rightp2
,/*shifted by*/diff_exponent
);
1819 /* Treat sum and difference of the operands separately. */
1820 if ((int)save
< 0) {
1822 * Difference of the two operands. Overflow can occur if the
1823 * multiply overflowed. A borrow can occur out of the hidden
1824 * bit and force a post normalization phase.
1826 Sglext_subtract(tmpresp1
,tmpresp2
, rightp1
,rightp2
,
1828 sign_save
= Sgl_signextendedsign(resultp1
);
1829 if (Sgl_iszero_hidden(resultp1
)) {
1830 /* Handle normalization */
1831 /* A straightforward algorithm would now shift the
1832 * result and extension left until the hidden bit
1833 * becomes one. Not all of the extension bits need
1834 * participate in the shift. Only the two most
1835 * significant bits (round and guard) are needed.
1836 * If only a single shift is needed then the guard
1837 * bit becomes a significant low order bit and the
1838 * extension must participate in the rounding.
1839 * If more than a single shift is needed, then all
1840 * bits to the right of the guard bit are zeros,
1841 * and the guard bit may or may not be zero. */
1842 Sglext_leftshiftby1(resultp1
,resultp2
);
1844 /* Need to check for a zero result. The sign and
1845 * exponent fields have already been zeroed. The more
1846 * efficient test of the full object can be used.
1848 if (Sglext_iszero(resultp1
,resultp2
)) {
1849 /* Must have been "x-x" or "x+(-x)". */
1850 if (Is_rounding_mode(ROUNDMINUS
))
1851 Sgl_setone_sign(resultp1
);
1852 Sgl_copytoptr(resultp1
,dstptr
);
1853 return(NOEXCEPTION
);
1857 /* Look to see if normalization is finished. */
1858 if (Sgl_isone_hidden(resultp1
)) {
1859 /* No further normalization is needed */
1863 /* Discover first one bit to determine shift amount.
1864 * Use a modified binary search. We have already
1865 * shifted the result one position right and still
1866 * not found a one so the remainder of the extension
1867 * must be zero and simplifies rounding. */
1869 while (Sgl_iszero_hiddenhigh7mantissa(resultp1
)) {
1870 Sglext_leftshiftby8(resultp1
,resultp2
);
1871 result_exponent
-= 8;
1873 /* Now narrow it down to the nibble */
1874 if (Sgl_iszero_hiddenhigh3mantissa(resultp1
)) {
1875 /* The lower nibble contains the
1876 * normalizing one */
1877 Sglext_leftshiftby4(resultp1
,resultp2
);
1878 result_exponent
-= 4;
1880 /* Select case where first bit is set (already
1881 * normalized) otherwise select the proper shift. */
1882 jumpsize
= Sgl_hiddenhigh3mantissa(resultp1
);
1883 if (jumpsize
<= 7) switch(jumpsize
) {
1885 Sglext_leftshiftby3(resultp1
,resultp2
);
1886 result_exponent
-= 3;
1890 Sglext_leftshiftby2(resultp1
,resultp2
);
1891 result_exponent
-= 2;
1897 Sglext_leftshiftby1(resultp1
,resultp2
);
1898 result_exponent
-= 1;
1901 } /* end if (hidden...)... */
1902 /* Fall through and round */
1903 } /* end if (save < 0)... */
1905 /* Add magnitudes */
1906 Sglext_addition(tmpresp1
,tmpresp2
,
1907 rightp1
,rightp2
, /*to*/resultp1
,resultp2
);
1908 sign_save
= Sgl_signextendedsign(resultp1
);
1909 if (Sgl_isone_hiddenoverflow(resultp1
)) {
1910 /* Prenormalization required. */
1911 Sglext_arithrightshiftby1(resultp1
,resultp2
);
1913 } /* end if hiddenoverflow... */
1914 } /* end else ...add magnitudes... */
1916 /* Round the result. If the extension and lower two words are
1917 * all zeros, then the result is exact. Otherwise round in the
1918 * correct direction. Underflow is possible. If a postnormalization
1919 * is necessary, then the mantissa is all zeros so no shift is needed.
1922 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
1923 Sglext_denormalize(resultp1
,resultp2
,result_exponent
,is_tiny
);
1925 Sgl_set_sign(resultp1
,/*using*/sign_save
);
1926 if (Sglext_isnotzero_mantissap2(resultp2
)) {
1928 switch(Rounding_mode()) {
1929 case ROUNDNEAREST
: /* The default. */
1930 if (Sglext_isone_highp2(resultp2
)) {
1931 /* at least 1/2 ulp */
1932 if (Sglext_isnotzero_low31p2(resultp2
) ||
1933 Sglext_isone_lowp1(resultp1
)) {
1934 /* either exactly half way and odd or
1935 * more than 1/2ulp */
1936 Sgl_increment(resultp1
);
1942 if (Sgl_iszero_sign(resultp1
)) {
1943 /* Round up positive results */
1944 Sgl_increment(resultp1
);
1949 if (Sgl_isone_sign(resultp1
)) {
1950 /* Round down negative results */
1951 Sgl_increment(resultp1
);
1955 /* truncate is simple */
1956 } /* end switch... */
1957 if (Sgl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
1959 if (result_exponent
>= SGL_INFINITY_EXPONENT
) {
1961 if (Is_overflowtrap_enabled()) {
1963 * Adjust bias of result
1965 Sgl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
1966 Sgl_copytoptr(resultp1
,dstptr
);
1968 if (Is_inexacttrap_enabled())
1969 return (OPC_2E_OVERFLOWEXCEPTION
|
1970 OPC_2E_INEXACTEXCEPTION
);
1971 else Set_inexactflag();
1972 return (OPC_2E_OVERFLOWEXCEPTION
);
1976 Sgl_setoverflow(resultp1
);
1977 } else if (result_exponent
<= 0) { /* underflow case */
1978 if (Is_underflowtrap_enabled()) {
1980 * Adjust bias of result
1982 Sgl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
1983 Sgl_copytoptr(resultp1
,dstptr
);
1985 if (Is_inexacttrap_enabled())
1986 return (OPC_2E_UNDERFLOWEXCEPTION
|
1987 OPC_2E_INEXACTEXCEPTION
);
1988 else Set_inexactflag();
1989 return(OPC_2E_UNDERFLOWEXCEPTION
);
1991 else if (inexact
&& is_tiny
) Set_underflowflag();
1993 else Sgl_set_exponent(resultp1
,result_exponent
);
1994 Sgl_copytoptr(resultp1
,dstptr
);
1996 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
1997 else Set_inexactflag();
1998 return(NOEXCEPTION
);
2002 * Single Floating-point Multiply Negate Fused Add
2005 sgl_fmpynfadd(src1ptr
,src2ptr
,src3ptr
,status
,dstptr
)
2007 sgl_floating_point
*src1ptr
, *src2ptr
, *src3ptr
, *dstptr
;
2008 unsigned int *status
;
2010 unsigned int opnd1
, opnd2
, opnd3
;
2011 register unsigned int tmpresp1
, tmpresp2
;
2012 unsigned int rightp1
, rightp2
;
2013 unsigned int resultp1
, resultp2
= 0;
2014 register int mpy_exponent
, add_exponent
, count
;
2015 boolean inexact
= FALSE
, is_tiny
= FALSE
;
2017 unsigned int signlessleft1
, signlessright1
, save
;
2018 register int result_exponent
, diff_exponent
;
2019 int sign_save
, jumpsize
;
2021 Sgl_copyfromptr(src1ptr
,opnd1
);
2022 Sgl_copyfromptr(src2ptr
,opnd2
);
2023 Sgl_copyfromptr(src3ptr
,opnd3
);
2026 * set sign bit of result of multiply
2028 if (Sgl_sign(opnd1
) ^ Sgl_sign(opnd2
))
2029 Sgl_setzero(resultp1
);
2031 Sgl_setnegativezero(resultp1
);
2034 * Generate multiply exponent
2036 mpy_exponent
= Sgl_exponent(opnd1
) + Sgl_exponent(opnd2
) - SGL_BIAS
;
2039 * check first operand for NaN's or infinity
2041 if (Sgl_isinfinity_exponent(opnd1
)) {
2042 if (Sgl_iszero_mantissa(opnd1
)) {
2043 if (Sgl_isnotnan(opnd2
) && Sgl_isnotnan(opnd3
)) {
2044 if (Sgl_iszero_exponentmantissa(opnd2
)) {
2046 * invalid since operands are infinity
2049 if (Is_invalidtrap_enabled())
2050 return(OPC_2E_INVALIDEXCEPTION
);
2052 Sgl_makequietnan(resultp1
);
2053 Sgl_copytoptr(resultp1
,dstptr
);
2054 return(NOEXCEPTION
);
2057 * Check third operand for infinity with a
2058 * sign opposite of the multiply result
2060 if (Sgl_isinfinity(opnd3
) &&
2061 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
2063 * invalid since attempting a magnitude
2064 * subtraction of infinities
2066 if (Is_invalidtrap_enabled())
2067 return(OPC_2E_INVALIDEXCEPTION
);
2069 Sgl_makequietnan(resultp1
);
2070 Sgl_copytoptr(resultp1
,dstptr
);
2071 return(NOEXCEPTION
);
2077 Sgl_setinfinity_exponentmantissa(resultp1
);
2078 Sgl_copytoptr(resultp1
,dstptr
);
2079 return(NOEXCEPTION
);
2084 * is NaN; signaling or quiet?
2086 if (Sgl_isone_signaling(opnd1
)) {
2087 /* trap if INVALIDTRAP enabled */
2088 if (Is_invalidtrap_enabled())
2089 return(OPC_2E_INVALIDEXCEPTION
);
2090 /* make NaN quiet */
2092 Sgl_set_quiet(opnd1
);
2095 * is second operand a signaling NaN?
2097 else if (Sgl_is_signalingnan(opnd2
)) {
2098 /* trap if INVALIDTRAP enabled */
2099 if (Is_invalidtrap_enabled())
2100 return(OPC_2E_INVALIDEXCEPTION
);
2101 /* make NaN quiet */
2103 Sgl_set_quiet(opnd2
);
2104 Sgl_copytoptr(opnd2
,dstptr
);
2105 return(NOEXCEPTION
);
2108 * is third operand a signaling NaN?
2110 else if (Sgl_is_signalingnan(opnd3
)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION
);
2114 /* make NaN quiet */
2116 Sgl_set_quiet(opnd3
);
2117 Sgl_copytoptr(opnd3
,dstptr
);
2118 return(NOEXCEPTION
);
2123 Sgl_copytoptr(opnd1
,dstptr
);
2124 return(NOEXCEPTION
);
2129 * check second operand for NaN's or infinity
2131 if (Sgl_isinfinity_exponent(opnd2
)) {
2132 if (Sgl_iszero_mantissa(opnd2
)) {
2133 if (Sgl_isnotnan(opnd3
)) {
2134 if (Sgl_iszero_exponentmantissa(opnd1
)) {
2136 * invalid since multiply operands are
2139 if (Is_invalidtrap_enabled())
2140 return(OPC_2E_INVALIDEXCEPTION
);
2142 Sgl_makequietnan(opnd2
);
2143 Sgl_copytoptr(opnd2
,dstptr
);
2144 return(NOEXCEPTION
);
2148 * Check third operand for infinity with a
2149 * sign opposite of the multiply result
2151 if (Sgl_isinfinity(opnd3
) &&
2152 (Sgl_sign(resultp1
) ^ Sgl_sign(opnd3
))) {
2154 * invalid since attempting a magnitude
2155 * subtraction of infinities
2157 if (Is_invalidtrap_enabled())
2158 return(OPC_2E_INVALIDEXCEPTION
);
2160 Sgl_makequietnan(resultp1
);
2161 Sgl_copytoptr(resultp1
,dstptr
);
2162 return(NOEXCEPTION
);
2168 Sgl_setinfinity_exponentmantissa(resultp1
);
2169 Sgl_copytoptr(resultp1
,dstptr
);
2170 return(NOEXCEPTION
);
2175 * is NaN; signaling or quiet?
2177 if (Sgl_isone_signaling(opnd2
)) {
2178 /* trap if INVALIDTRAP enabled */
2179 if (Is_invalidtrap_enabled())
2180 return(OPC_2E_INVALIDEXCEPTION
);
2181 /* make NaN quiet */
2183 Sgl_set_quiet(opnd2
);
2186 * is third operand a signaling NaN?
2188 else if (Sgl_is_signalingnan(opnd3
)) {
2189 /* trap if INVALIDTRAP enabled */
2190 if (Is_invalidtrap_enabled())
2191 return(OPC_2E_INVALIDEXCEPTION
);
2192 /* make NaN quiet */
2194 Sgl_set_quiet(opnd3
);
2195 Sgl_copytoptr(opnd3
,dstptr
);
2196 return(NOEXCEPTION
);
2201 Sgl_copytoptr(opnd2
,dstptr
);
2202 return(NOEXCEPTION
);
2207 * check third operand for NaN's or infinity
2209 if (Sgl_isinfinity_exponent(opnd3
)) {
2210 if (Sgl_iszero_mantissa(opnd3
)) {
2211 /* return infinity */
2212 Sgl_copytoptr(opnd3
,dstptr
);
2213 return(NOEXCEPTION
);
2216 * is NaN; signaling or quiet?
2218 if (Sgl_isone_signaling(opnd3
)) {
2219 /* trap if INVALIDTRAP enabled */
2220 if (Is_invalidtrap_enabled())
2221 return(OPC_2E_INVALIDEXCEPTION
);
2222 /* make NaN quiet */
2224 Sgl_set_quiet(opnd3
);
2229 Sgl_copytoptr(opnd3
,dstptr
);
2230 return(NOEXCEPTION
);
2235 * Generate multiply mantissa
2237 if (Sgl_isnotzero_exponent(opnd1
)) {
2238 /* set hidden bit */
2239 Sgl_clear_signexponent_set_hidden(opnd1
);
2242 /* check for zero */
2243 if (Sgl_iszero_mantissa(opnd1
)) {
2245 * Perform the add opnd3 with zero here.
2247 if (Sgl_iszero_exponentmantissa(opnd3
)) {
2248 if (Is_rounding_mode(ROUNDMINUS
)) {
2249 Sgl_or_signs(opnd3
,resultp1
);
2251 Sgl_and_signs(opnd3
,resultp1
);
2255 * Now let's check for trapped underflow case.
2257 else if (Sgl_iszero_exponent(opnd3
) &&
2258 Is_underflowtrap_enabled()) {
2259 /* need to normalize results mantissa */
2260 sign_save
= Sgl_signextendedsign(opnd3
);
2261 result_exponent
= 0;
2262 Sgl_leftshiftby1(opnd3
);
2263 Sgl_normalize(opnd3
,result_exponent
);
2264 Sgl_set_sign(opnd3
,/*using*/sign_save
);
2265 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
2267 Sgl_copytoptr(opnd3
,dstptr
);
2268 /* inexact = FALSE */
2269 return(OPC_2E_UNDERFLOWEXCEPTION
);
2271 Sgl_copytoptr(opnd3
,dstptr
);
2272 return(NOEXCEPTION
);
2274 /* is denormalized, adjust exponent */
2275 Sgl_clear_signexponent(opnd1
);
2276 Sgl_leftshiftby1(opnd1
);
2277 Sgl_normalize(opnd1
,mpy_exponent
);
2279 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2280 if (Sgl_isnotzero_exponent(opnd2
)) {
2281 Sgl_clear_signexponent_set_hidden(opnd2
);
2284 /* check for zero */
2285 if (Sgl_iszero_mantissa(opnd2
)) {
2287 * Perform the add opnd3 with zero here.
2289 if (Sgl_iszero_exponentmantissa(opnd3
)) {
2290 if (Is_rounding_mode(ROUNDMINUS
)) {
2291 Sgl_or_signs(opnd3
,resultp1
);
2293 Sgl_and_signs(opnd3
,resultp1
);
2297 * Now let's check for trapped underflow case.
2299 else if (Sgl_iszero_exponent(opnd3
) &&
2300 Is_underflowtrap_enabled()) {
2301 /* need to normalize results mantissa */
2302 sign_save
= Sgl_signextendedsign(opnd3
);
2303 result_exponent
= 0;
2304 Sgl_leftshiftby1(opnd3
);
2305 Sgl_normalize(opnd3
,result_exponent
);
2306 Sgl_set_sign(opnd3
,/*using*/sign_save
);
2307 Sgl_setwrapped_exponent(opnd3
,result_exponent
,
2309 Sgl_copytoptr(opnd3
,dstptr
);
2310 /* inexact = FALSE */
2311 return(OPC_2E_UNDERFLOWEXCEPTION
);
2313 Sgl_copytoptr(opnd3
,dstptr
);
2314 return(NOEXCEPTION
);
2316 /* is denormalized; want to normalize */
2317 Sgl_clear_signexponent(opnd2
);
2318 Sgl_leftshiftby1(opnd2
);
2319 Sgl_normalize(opnd2
,mpy_exponent
);
2322 /* Multiply the first two source mantissas together */
2325 * The intermediate result will be kept in tmpres,
2326 * which needs enough room for 106 bits of mantissa,
2327 * so lets call it a Double extended.
2329 Sglext_setzero(tmpresp1
,tmpresp2
);
2332 * Four bits at a time are inspected in each loop, and a
2333 * simple shift and add multiply algorithm is used.
2335 for (count
= SGL_P
-1; count
>= 0; count
-= 4) {
2336 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
2337 if (Sbit28(opnd1
)) {
2338 /* Twoword_add should be an ADD followed by 2 ADDC's */
2339 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<3, 0);
2341 if (Sbit29(opnd1
)) {
2342 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<2, 0);
2344 if (Sbit30(opnd1
)) {
2345 Twoword_add(tmpresp1
, tmpresp2
, opnd2
<<1, 0);
2347 if (Sbit31(opnd1
)) {
2348 Twoword_add(tmpresp1
, tmpresp2
, opnd2
, 0);
2350 Sgl_rightshiftby4(opnd1
);
2352 if (Is_sexthiddenoverflow(tmpresp1
)) {
2353 /* result mantissa >= 2 (mantissa overflow) */
2355 Sglext_rightshiftby4(tmpresp1
,tmpresp2
);
2357 Sglext_rightshiftby3(tmpresp1
,tmpresp2
);
2361 * Restore the sign of the mpy result which was saved in resultp1.
2362 * The exponent will continue to be kept in mpy_exponent.
2364 Sglext_set_sign(tmpresp1
,Sgl_sign(resultp1
));
2367 * No rounding is required, since the result of the multiply
2368 * is exact in the extended format.
2372 * Now we are ready to perform the add portion of the operation.
2374 * The exponents need to be kept as integers for now, since the
2375 * multiply result might not fit into the exponent field. We
2376 * can't overflow or underflow because of this yet, since the
2377 * add could bring the final result back into range.
2379 add_exponent
= Sgl_exponent(opnd3
);
2382 * Check for denormalized or zero add operand.
2384 if (add_exponent
== 0) {
2385 /* check for zero */
2386 if (Sgl_iszero_mantissa(opnd3
)) {
2388 /* Left can't be zero and must be result.
2390 * The final result is now in tmpres and mpy_exponent,
2391 * and needs to be rounded and squeezed back into
2392 * double precision format from double extended.
2394 result_exponent
= mpy_exponent
;
2395 Sglext_copy(tmpresp1
,tmpresp2
,resultp1
,resultp2
);
2396 sign_save
= Sgl_signextendedsign(resultp1
);/*save sign*/
2401 * Neither are zeroes.
2402 * Adjust exponent and normalize add operand.
2404 sign_save
= Sgl_signextendedsign(opnd3
); /* save sign */
2405 Sgl_clear_signexponent(opnd3
);
2406 Sgl_leftshiftby1(opnd3
);
2407 Sgl_normalize(opnd3
,add_exponent
);
2408 Sgl_set_sign(opnd3
,sign_save
); /* restore sign */
2410 Sgl_clear_exponent_set_hidden(opnd3
);
2413 * Copy opnd3 to the double extended variable called right.
2415 Sgl_copyto_sglext(opnd3
,rightp1
,rightp2
);
2418 * A zero "save" helps discover equal operands (for later),
2419 * and is used in swapping operands (if needed).
2421 Sglext_xortointp1(tmpresp1
,rightp1
,/*to*/save
);
2424 * Compare magnitude of operands.
2426 Sglext_copytoint_exponentmantissa(tmpresp1
,signlessleft1
);
2427 Sglext_copytoint_exponentmantissa(rightp1
,signlessright1
);
2428 if (mpy_exponent
< add_exponent
|| mpy_exponent
== add_exponent
&&
2429 Sglext_ismagnitudeless(signlessleft1
,signlessright1
)) {
2431 * Set the left operand to the larger one by XOR swap.
2432 * First finish the first word "save".
2434 Sglext_xorfromintp1(save
,rightp1
,/*to*/rightp1
);
2435 Sglext_xorfromintp1(save
,tmpresp1
,/*to*/tmpresp1
);
2436 Sglext_swap_lower(tmpresp2
,rightp2
);
2437 /* also setup exponents used in rest of routine */
2438 diff_exponent
= add_exponent
- mpy_exponent
;
2439 result_exponent
= add_exponent
;
2441 /* also setup exponents used in rest of routine */
2442 diff_exponent
= mpy_exponent
- add_exponent
;
2443 result_exponent
= mpy_exponent
;
2445 /* Invariant: left is not smaller than right. */
2448 * Special case alignment of operands that would force alignment
2449 * beyond the extent of the extension. A further optimization
2450 * could special case this but only reduces the path length for
2451 * this infrequent case.
2453 if (diff_exponent
> SGLEXT_THRESHOLD
) {
2454 diff_exponent
= SGLEXT_THRESHOLD
;
2457 /* Align right operand by shifting it to the right */
2458 Sglext_clear_sign(rightp1
);
2459 Sglext_right_align(rightp1
,rightp2
,/*shifted by*/diff_exponent
);
2461 /* Treat sum and difference of the operands separately. */
2462 if ((int)save
< 0) {
2464 * Difference of the two operands. Overflow can occur if the
2465 * multiply overflowed. A borrow can occur out of the hidden
2466 * bit and force a post normalization phase.
2468 Sglext_subtract(tmpresp1
,tmpresp2
, rightp1
,rightp2
,
2470 sign_save
= Sgl_signextendedsign(resultp1
);
2471 if (Sgl_iszero_hidden(resultp1
)) {
2472 /* Handle normalization */
2473 /* A straightforward algorithm would now shift the
2474 * result and extension left until the hidden bit
2475 * becomes one. Not all of the extension bits need
2476 * participate in the shift. Only the two most
2477 * significant bits (round and guard) are needed.
2478 * If only a single shift is needed then the guard
2479 * bit becomes a significant low order bit and the
2480 * extension must participate in the rounding.
2481 * If more than a single shift is needed, then all
2482 * bits to the right of the guard bit are zeros,
2483 * and the guard bit may or may not be zero. */
2484 Sglext_leftshiftby1(resultp1
,resultp2
);
2486 /* Need to check for a zero result. The sign and
2487 * exponent fields have already been zeroed. The more
2488 * efficient test of the full object can be used.
2490 if (Sglext_iszero(resultp1
,resultp2
)) {
2491 /* Must have been "x-x" or "x+(-x)". */
2492 if (Is_rounding_mode(ROUNDMINUS
))
2493 Sgl_setone_sign(resultp1
);
2494 Sgl_copytoptr(resultp1
,dstptr
);
2495 return(NOEXCEPTION
);
2499 /* Look to see if normalization is finished. */
2500 if (Sgl_isone_hidden(resultp1
)) {
2501 /* No further normalization is needed */
2505 /* Discover first one bit to determine shift amount.
2506 * Use a modified binary search. We have already
2507 * shifted the result one position right and still
2508 * not found a one so the remainder of the extension
2509 * must be zero and simplifies rounding. */
2511 while (Sgl_iszero_hiddenhigh7mantissa(resultp1
)) {
2512 Sglext_leftshiftby8(resultp1
,resultp2
);
2513 result_exponent
-= 8;
2515 /* Now narrow it down to the nibble */
2516 if (Sgl_iszero_hiddenhigh3mantissa(resultp1
)) {
2517 /* The lower nibble contains the
2518 * normalizing one */
2519 Sglext_leftshiftby4(resultp1
,resultp2
);
2520 result_exponent
-= 4;
2522 /* Select case where first bit is set (already
2523 * normalized) otherwise select the proper shift. */
2524 jumpsize
= Sgl_hiddenhigh3mantissa(resultp1
);
2525 if (jumpsize
<= 7) switch(jumpsize
) {
2527 Sglext_leftshiftby3(resultp1
,resultp2
);
2528 result_exponent
-= 3;
2532 Sglext_leftshiftby2(resultp1
,resultp2
);
2533 result_exponent
-= 2;
2539 Sglext_leftshiftby1(resultp1
,resultp2
);
2540 result_exponent
-= 1;
2543 } /* end if (hidden...)... */
2544 /* Fall through and round */
2545 } /* end if (save < 0)... */
2547 /* Add magnitudes */
2548 Sglext_addition(tmpresp1
,tmpresp2
,
2549 rightp1
,rightp2
, /*to*/resultp1
,resultp2
);
2550 sign_save
= Sgl_signextendedsign(resultp1
);
2551 if (Sgl_isone_hiddenoverflow(resultp1
)) {
2552 /* Prenormalization required. */
2553 Sglext_arithrightshiftby1(resultp1
,resultp2
);
2555 } /* end if hiddenoverflow... */
2556 } /* end else ...add magnitudes... */
2558 /* Round the result. If the extension and lower two words are
2559 * all zeros, then the result is exact. Otherwise round in the
2560 * correct direction. Underflow is possible. If a postnormalization
2561 * is necessary, then the mantissa is all zeros so no shift is needed.
2564 if (result_exponent
<= 0 && !Is_underflowtrap_enabled()) {
2565 Sglext_denormalize(resultp1
,resultp2
,result_exponent
,is_tiny
);
2567 Sgl_set_sign(resultp1
,/*using*/sign_save
);
2568 if (Sglext_isnotzero_mantissap2(resultp2
)) {
2570 switch(Rounding_mode()) {
2571 case ROUNDNEAREST
: /* The default. */
2572 if (Sglext_isone_highp2(resultp2
)) {
2573 /* at least 1/2 ulp */
2574 if (Sglext_isnotzero_low31p2(resultp2
) ||
2575 Sglext_isone_lowp1(resultp1
)) {
2576 /* either exactly half way and odd or
2577 * more than 1/2ulp */
2578 Sgl_increment(resultp1
);
2584 if (Sgl_iszero_sign(resultp1
)) {
2585 /* Round up positive results */
2586 Sgl_increment(resultp1
);
2591 if (Sgl_isone_sign(resultp1
)) {
2592 /* Round down negative results */
2593 Sgl_increment(resultp1
);
2597 /* truncate is simple */
2598 } /* end switch... */
2599 if (Sgl_isone_hiddenoverflow(resultp1
)) result_exponent
++;
2601 if (result_exponent
>= SGL_INFINITY_EXPONENT
) {
2603 if (Is_overflowtrap_enabled()) {
2605 * Adjust bias of result
2607 Sgl_setwrapped_exponent(resultp1
,result_exponent
,ovfl
);
2608 Sgl_copytoptr(resultp1
,dstptr
);
2610 if (Is_inexacttrap_enabled())
2611 return (OPC_2E_OVERFLOWEXCEPTION
|
2612 OPC_2E_INEXACTEXCEPTION
);
2613 else Set_inexactflag();
2614 return (OPC_2E_OVERFLOWEXCEPTION
);
2618 Sgl_setoverflow(resultp1
);
2619 } else if (result_exponent
<= 0) { /* underflow case */
2620 if (Is_underflowtrap_enabled()) {
2622 * Adjust bias of result
2624 Sgl_setwrapped_exponent(resultp1
,result_exponent
,unfl
);
2625 Sgl_copytoptr(resultp1
,dstptr
);
2627 if (Is_inexacttrap_enabled())
2628 return (OPC_2E_UNDERFLOWEXCEPTION
|
2629 OPC_2E_INEXACTEXCEPTION
);
2630 else Set_inexactflag();
2631 return(OPC_2E_UNDERFLOWEXCEPTION
);
2633 else if (inexact
&& is_tiny
) Set_underflowflag();
2635 else Sgl_set_exponent(resultp1
,result_exponent
);
2636 Sgl_copytoptr(resultp1
,dstptr
);
2638 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION
);
2639 else Set_inexactflag();
2640 return(NOEXCEPTION
);