ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
[linux/fpc-iii.git] / arch / parisc / math-emu / fmpyfadd.c
blob029f9bbfe7b5b93145280f8ad6e06669d1c420e3
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
5 * Floating-point emulation code
6 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7 */
8 /*
9 * BEGIN_DESC
11 * File:
12 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
14 * Purpose:
15 * Double Floating-point Multiply Fused Add
16 * Double Floating-point Multiply Negate Fused Add
17 * Single Floating-point Multiply Fused Add
18 * Single Floating-point Multiply Negate Fused Add
20 * External Interfaces:
21 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
26 * Internal Interfaces:
28 * Theory:
29 * <<please update with a overview of the operation of this file>>
31 * END_DESC
35 #include "float.h"
36 #include "sgl_float.h"
37 #include "dbl_float.h"
41 * Double Floating-point Multiply Fused Add
44 int
45 dbl_fmpyfadd(
46 dbl_floating_point *src1ptr,
47 dbl_floating_point *src2ptr,
48 dbl_floating_point *src3ptr,
49 unsigned int *status,
50 dbl_floating_point *dstptr)
52 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54 unsigned int rightp1, rightp2, rightp3, rightp4;
55 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56 register int mpy_exponent, add_exponent, count;
57 boolean inexact = FALSE, is_tiny = FALSE;
59 unsigned int signlessleft1, signlessright1, save;
60 register int result_exponent, diff_exponent;
61 int sign_save, jumpsize;
63 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
67 /*
68 * set sign bit of result of multiply
70 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71 Dbl_setnegativezerop1(resultp1);
72 else Dbl_setzerop1(resultp1);
75 * Generate multiply exponent
77 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
80 * check first operand for NaN's or infinity
82 if (Dbl_isinfinity_exponent(opnd1p1)) {
83 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85 Dbl_isnotnan(opnd3p1,opnd3p2)) {
86 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87 /*
88 * invalid since operands are infinity
89 * and zero
91 if (Is_invalidtrap_enabled())
92 return(OPC_2E_INVALIDEXCEPTION);
93 Set_invalidflag();
94 Dbl_makequietnan(resultp1,resultp2);
95 Dbl_copytoptr(resultp1,resultp2,dstptr);
96 return(NOEXCEPTION);
99 * Check third operand for infinity with a
100 * sign opposite of the multiply result
102 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
105 * invalid since attempting a magnitude
106 * subtraction of infinities
108 if (Is_invalidtrap_enabled())
109 return(OPC_2E_INVALIDEXCEPTION);
110 Set_invalidflag();
111 Dbl_makequietnan(resultp1,resultp2);
112 Dbl_copytoptr(resultp1,resultp2,dstptr);
113 return(NOEXCEPTION);
117 * return infinity
119 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120 Dbl_copytoptr(resultp1,resultp2,dstptr);
121 return(NOEXCEPTION);
124 else {
126 * is NaN; signaling or quiet?
128 if (Dbl_isone_signaling(opnd1p1)) {
129 /* trap if INVALIDTRAP enabled */
130 if (Is_invalidtrap_enabled())
131 return(OPC_2E_INVALIDEXCEPTION);
132 /* make NaN quiet */
133 Set_invalidflag();
134 Dbl_set_quiet(opnd1p1);
137 * is second operand a signaling NaN?
139 else if (Dbl_is_signalingnan(opnd2p1)) {
140 /* trap if INVALIDTRAP enabled */
141 if (Is_invalidtrap_enabled())
142 return(OPC_2E_INVALIDEXCEPTION);
143 /* make NaN quiet */
144 Set_invalidflag();
145 Dbl_set_quiet(opnd2p1);
146 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147 return(NOEXCEPTION);
150 * is third operand a signaling NaN?
152 else if (Dbl_is_signalingnan(opnd3p1)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION);
156 /* make NaN quiet */
157 Set_invalidflag();
158 Dbl_set_quiet(opnd3p1);
159 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160 return(NOEXCEPTION);
163 * return quiet NaN
165 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166 return(NOEXCEPTION);
171 * check second operand for NaN's or infinity
173 if (Dbl_isinfinity_exponent(opnd2p1)) {
174 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
178 * invalid since multiply operands are
179 * zero & infinity
181 if (Is_invalidtrap_enabled())
182 return(OPC_2E_INVALIDEXCEPTION);
183 Set_invalidflag();
184 Dbl_makequietnan(opnd2p1,opnd2p2);
185 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186 return(NOEXCEPTION);
190 * Check third operand for infinity with a
191 * sign opposite of the multiply result
193 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
196 * invalid since attempting a magnitude
197 * subtraction of infinities
199 if (Is_invalidtrap_enabled())
200 return(OPC_2E_INVALIDEXCEPTION);
201 Set_invalidflag();
202 Dbl_makequietnan(resultp1,resultp2);
203 Dbl_copytoptr(resultp1,resultp2,dstptr);
204 return(NOEXCEPTION);
208 * return infinity
210 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211 Dbl_copytoptr(resultp1,resultp2,dstptr);
212 return(NOEXCEPTION);
215 else {
217 * is NaN; signaling or quiet?
219 if (Dbl_isone_signaling(opnd2p1)) {
220 /* trap if INVALIDTRAP enabled */
221 if (Is_invalidtrap_enabled())
222 return(OPC_2E_INVALIDEXCEPTION);
223 /* make NaN quiet */
224 Set_invalidflag();
225 Dbl_set_quiet(opnd2p1);
228 * is third operand a signaling NaN?
230 else if (Dbl_is_signalingnan(opnd3p1)) {
231 /* trap if INVALIDTRAP enabled */
232 if (Is_invalidtrap_enabled())
233 return(OPC_2E_INVALIDEXCEPTION);
234 /* make NaN quiet */
235 Set_invalidflag();
236 Dbl_set_quiet(opnd3p1);
237 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238 return(NOEXCEPTION);
241 * return quiet NaN
243 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244 return(NOEXCEPTION);
249 * check third operand for NaN's or infinity
251 if (Dbl_isinfinity_exponent(opnd3p1)) {
252 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253 /* return infinity */
254 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255 return(NOEXCEPTION);
256 } else {
258 * is NaN; signaling or quiet?
260 if (Dbl_isone_signaling(opnd3p1)) {
261 /* trap if INVALIDTRAP enabled */
262 if (Is_invalidtrap_enabled())
263 return(OPC_2E_INVALIDEXCEPTION);
264 /* make NaN quiet */
265 Set_invalidflag();
266 Dbl_set_quiet(opnd3p1);
269 * return quiet NaN
271 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272 return(NOEXCEPTION);
277 * Generate multiply mantissa
279 if (Dbl_isnotzero_exponent(opnd1p1)) {
280 /* set hidden bit */
281 Dbl_clear_signexponent_set_hidden(opnd1p1);
283 else {
284 /* check for zero */
285 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
287 * Perform the add opnd3 with zero here.
289 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290 if (Is_rounding_mode(ROUNDMINUS)) {
291 Dbl_or_signs(opnd3p1,resultp1);
292 } else {
293 Dbl_and_signs(opnd3p1,resultp1);
297 * Now let's check for trapped underflow case.
299 else if (Dbl_iszero_exponent(opnd3p1) &&
300 Is_underflowtrap_enabled()) {
301 /* need to normalize results mantissa */
302 sign_save = Dbl_signextendedsign(opnd3p1);
303 result_exponent = 0;
304 Dbl_leftshiftby1(opnd3p1,opnd3p2);
305 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306 Dbl_set_sign(opnd3p1,/*using*/sign_save);
307 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308 unfl);
309 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310 /* inexact = FALSE */
311 return(OPC_2E_UNDERFLOWEXCEPTION);
313 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314 return(NOEXCEPTION);
316 /* is denormalized, adjust exponent */
317 Dbl_clear_signexponent(opnd1p1);
318 Dbl_leftshiftby1(opnd1p1,opnd1p2);
319 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
321 /* opnd2 needs to have hidden bit set with msb in hidden bit */
322 if (Dbl_isnotzero_exponent(opnd2p1)) {
323 Dbl_clear_signexponent_set_hidden(opnd2p1);
325 else {
326 /* check for zero */
327 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
329 * Perform the add opnd3 with zero here.
331 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332 if (Is_rounding_mode(ROUNDMINUS)) {
333 Dbl_or_signs(opnd3p1,resultp1);
334 } else {
335 Dbl_and_signs(opnd3p1,resultp1);
339 * Now let's check for trapped underflow case.
341 else if (Dbl_iszero_exponent(opnd3p1) &&
342 Is_underflowtrap_enabled()) {
343 /* need to normalize results mantissa */
344 sign_save = Dbl_signextendedsign(opnd3p1);
345 result_exponent = 0;
346 Dbl_leftshiftby1(opnd3p1,opnd3p2);
347 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348 Dbl_set_sign(opnd3p1,/*using*/sign_save);
349 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350 unfl);
351 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352 /* inexact = FALSE */
353 return(OPC_2E_UNDERFLOWEXCEPTION);
355 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356 return(NOEXCEPTION);
358 /* is denormalized; want to normalize */
359 Dbl_clear_signexponent(opnd2p1);
360 Dbl_leftshiftby1(opnd2p1,opnd2p2);
361 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
364 /* Multiply the first two source mantissas together */
367 * The intermediate result will be kept in tmpres,
368 * which needs enough room for 106 bits of mantissa,
369 * so lets call it a Double extended.
371 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
374 * Four bits at a time are inspected in each loop, and a
375 * simple shift and add multiply algorithm is used.
377 for (count = DBL_P-1; count >= 0; count -= 4) {
378 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379 if (Dbit28p2(opnd1p2)) {
380 /* Fourword_add should be an ADD followed by 3 ADDC's */
381 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
384 if (Dbit29p2(opnd1p2)) {
385 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
388 if (Dbit30p2(opnd1p2)) {
389 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
392 if (Dbit31p2(opnd1p2)) {
393 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394 opnd2p1, opnd2p2, 0, 0);
396 Dbl_rightshiftby4(opnd1p1,opnd1p2);
398 if (Is_dexthiddenoverflow(tmpresp1)) {
399 /* result mantissa >= 2 (mantissa overflow) */
400 mpy_exponent++;
401 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
405 * Restore the sign of the mpy result which was saved in resultp1.
406 * The exponent will continue to be kept in mpy_exponent.
408 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
411 * No rounding is required, since the result of the multiply
412 * is exact in the extended format.
416 * Now we are ready to perform the add portion of the operation.
418 * The exponents need to be kept as integers for now, since the
419 * multiply result might not fit into the exponent field. We
420 * can't overflow or underflow because of this yet, since the
421 * add could bring the final result back into range.
423 add_exponent = Dbl_exponent(opnd3p1);
426 * Check for denormalized or zero add operand.
428 if (add_exponent == 0) {
429 /* check for zero */
430 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431 /* right is zero */
432 /* Left can't be zero and must be result.
434 * The final result is now in tmpres and mpy_exponent,
435 * and needs to be rounded and squeezed back into
436 * double precision format from double extended.
438 result_exponent = mpy_exponent;
439 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440 resultp1,resultp2,resultp3,resultp4);
441 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442 goto round;
446 * Neither are zeroes.
447 * Adjust exponent and normalize add operand.
449 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
450 Dbl_clear_signexponent(opnd3p1);
451 Dbl_leftshiftby1(opnd3p1,opnd3p2);
452 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
454 } else {
455 Dbl_clear_exponent_set_hidden(opnd3p1);
458 * Copy opnd3 to the double extended variable called right.
460 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
463 * A zero "save" helps discover equal operands (for later),
464 * and is used in swapping operands (if needed).
466 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
469 * Compare magnitude of operands.
471 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
476 * Set the left operand to the larger one by XOR swap.
477 * First finish the first word "save".
479 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482 rightp2,rightp3,rightp4);
483 /* also setup exponents used in rest of routine */
484 diff_exponent = add_exponent - mpy_exponent;
485 result_exponent = add_exponent;
486 } else {
487 /* also setup exponents used in rest of routine */
488 diff_exponent = mpy_exponent - add_exponent;
489 result_exponent = mpy_exponent;
491 /* Invariant: left is not smaller than right. */
494 * Special case alignment of operands that would force alignment
495 * beyond the extent of the extension. A further optimization
496 * could special case this but only reduces the path length for
497 * this infrequent case.
499 if (diff_exponent > DBLEXT_THRESHOLD) {
500 diff_exponent = DBLEXT_THRESHOLD;
503 /* Align right operand by shifting it to the right */
504 Dblext_clear_sign(rightp1);
505 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506 /*shifted by*/diff_exponent);
508 /* Treat sum and difference of the operands separately. */
509 if ((int)save < 0) {
511 * Difference of the two operands. Overflow can occur if the
512 * multiply overflowed. A borrow can occur out of the hidden
513 * bit and force a post normalization phase.
515 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516 rightp1,rightp2,rightp3,rightp4,
517 resultp1,resultp2,resultp3,resultp4);
518 sign_save = Dbl_signextendedsign(resultp1);
519 if (Dbl_iszero_hidden(resultp1)) {
520 /* Handle normalization */
521 /* A straightforward algorithm would now shift the
522 * result and extension left until the hidden bit
523 * becomes one. Not all of the extension bits need
524 * participate in the shift. Only the two most
525 * significant bits (round and guard) are needed.
526 * If only a single shift is needed then the guard
527 * bit becomes a significant low order bit and the
528 * extension must participate in the rounding.
529 * If more than a single shift is needed, then all
530 * bits to the right of the guard bit are zeros,
531 * and the guard bit may or may not be zero. */
532 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533 resultp4);
535 /* Need to check for a zero result. The sign and
536 * exponent fields have already been zeroed. The more
537 * efficient test of the full object can be used.
539 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540 /* Must have been "x-x" or "x+(-x)". */
541 if (Is_rounding_mode(ROUNDMINUS))
542 Dbl_setone_sign(resultp1);
543 Dbl_copytoptr(resultp1,resultp2,dstptr);
544 return(NOEXCEPTION);
546 result_exponent--;
548 /* Look to see if normalization is finished. */
549 if (Dbl_isone_hidden(resultp1)) {
550 /* No further normalization is needed */
551 goto round;
554 /* Discover first one bit to determine shift amount.
555 * Use a modified binary search. We have already
556 * shifted the result one position right and still
557 * not found a one so the remainder of the extension
558 * must be zero and simplifies rounding. */
559 /* Scan bytes */
560 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562 result_exponent -= 8;
564 /* Now narrow it down to the nibble */
565 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566 /* The lower nibble contains the
567 * normalizing one */
568 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569 result_exponent -= 4;
571 /* Select case where first bit is set (already
572 * normalized) otherwise select the proper shift. */
573 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574 if (jumpsize <= 7) switch(jumpsize) {
575 case 1:
576 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577 resultp4);
578 result_exponent -= 3;
579 break;
580 case 2:
581 case 3:
582 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583 resultp4);
584 result_exponent -= 2;
585 break;
586 case 4:
587 case 5:
588 case 6:
589 case 7:
590 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591 resultp4);
592 result_exponent -= 1;
593 break;
595 } /* end if (hidden...)... */
596 /* Fall through and round */
597 } /* end if (save < 0)... */
598 else {
599 /* Add magnitudes */
600 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601 rightp1,rightp2,rightp3,rightp4,
602 /*to*/resultp1,resultp2,resultp3,resultp4);
603 sign_save = Dbl_signextendedsign(resultp1);
604 if (Dbl_isone_hiddenoverflow(resultp1)) {
605 /* Prenormalization required. */
606 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607 resultp4);
608 result_exponent++;
609 } /* end if hiddenoverflow... */
610 } /* end else ...add magnitudes... */
612 /* Round the result. If the extension and lower two words are
613 * all zeros, then the result is exact. Otherwise round in the
614 * correct direction. Underflow is possible. If a postnormalization
615 * is necessary, then the mantissa is all zeros so no shift is needed.
617 round:
618 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620 result_exponent,is_tiny);
622 Dbl_set_sign(resultp1,/*using*/sign_save);
623 if (Dblext_isnotzero_mantissap3(resultp3) ||
624 Dblext_isnotzero_mantissap4(resultp4)) {
625 inexact = TRUE;
626 switch(Rounding_mode()) {
627 case ROUNDNEAREST: /* The default. */
628 if (Dblext_isone_highp3(resultp3)) {
629 /* at least 1/2 ulp */
630 if (Dblext_isnotzero_low31p3(resultp3) ||
631 Dblext_isnotzero_mantissap4(resultp4) ||
632 Dblext_isone_lowp2(resultp2)) {
633 /* either exactly half way and odd or
634 * more than 1/2ulp */
635 Dbl_increment(resultp1,resultp2);
638 break;
640 case ROUNDPLUS:
641 if (Dbl_iszero_sign(resultp1)) {
642 /* Round up positive results */
643 Dbl_increment(resultp1,resultp2);
645 break;
647 case ROUNDMINUS:
648 if (Dbl_isone_sign(resultp1)) {
649 /* Round down negative results */
650 Dbl_increment(resultp1,resultp2);
653 case ROUNDZERO:;
654 /* truncate is simple */
655 } /* end switch... */
656 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
658 if (result_exponent >= DBL_INFINITY_EXPONENT) {
659 /* trap if OVERFLOWTRAP enabled */
660 if (Is_overflowtrap_enabled()) {
662 * Adjust bias of result
664 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665 Dbl_copytoptr(resultp1,resultp2,dstptr);
666 if (inexact)
667 if (Is_inexacttrap_enabled())
668 return (OPC_2E_OVERFLOWEXCEPTION |
669 OPC_2E_INEXACTEXCEPTION);
670 else Set_inexactflag();
671 return (OPC_2E_OVERFLOWEXCEPTION);
673 inexact = TRUE;
674 Set_overflowflag();
675 /* set result to infinity or largest number */
676 Dbl_setoverflow(resultp1,resultp2);
678 } else if (result_exponent <= 0) { /* underflow case */
679 if (Is_underflowtrap_enabled()) {
681 * Adjust bias of result
683 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684 Dbl_copytoptr(resultp1,resultp2,dstptr);
685 if (inexact)
686 if (Is_inexacttrap_enabled())
687 return (OPC_2E_UNDERFLOWEXCEPTION |
688 OPC_2E_INEXACTEXCEPTION);
689 else Set_inexactflag();
690 return(OPC_2E_UNDERFLOWEXCEPTION);
692 else if (inexact && is_tiny) Set_underflowflag();
694 else Dbl_set_exponent(resultp1,result_exponent);
695 Dbl_copytoptr(resultp1,resultp2,dstptr);
696 if (inexact)
697 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698 else Set_inexactflag();
699 return(NOEXCEPTION);
703 * Double Floating-point Multiply Negate Fused Add
706 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
708 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709 unsigned int *status;
711 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713 unsigned int rightp1, rightp2, rightp3, rightp4;
714 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715 register int mpy_exponent, add_exponent, count;
716 boolean inexact = FALSE, is_tiny = FALSE;
718 unsigned int signlessleft1, signlessright1, save;
719 register int result_exponent, diff_exponent;
720 int sign_save, jumpsize;
722 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
727 * set sign bit of result of multiply
729 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730 Dbl_setzerop1(resultp1);
731 else
732 Dbl_setnegativezerop1(resultp1);
735 * Generate multiply exponent
737 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
740 * check first operand for NaN's or infinity
742 if (Dbl_isinfinity_exponent(opnd1p1)) {
743 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745 Dbl_isnotnan(opnd3p1,opnd3p2)) {
746 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
748 * invalid since operands are infinity
749 * and zero
751 if (Is_invalidtrap_enabled())
752 return(OPC_2E_INVALIDEXCEPTION);
753 Set_invalidflag();
754 Dbl_makequietnan(resultp1,resultp2);
755 Dbl_copytoptr(resultp1,resultp2,dstptr);
756 return(NOEXCEPTION);
759 * Check third operand for infinity with a
760 * sign opposite of the multiply result
762 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
765 * invalid since attempting a magnitude
766 * subtraction of infinities
768 if (Is_invalidtrap_enabled())
769 return(OPC_2E_INVALIDEXCEPTION);
770 Set_invalidflag();
771 Dbl_makequietnan(resultp1,resultp2);
772 Dbl_copytoptr(resultp1,resultp2,dstptr);
773 return(NOEXCEPTION);
777 * return infinity
779 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780 Dbl_copytoptr(resultp1,resultp2,dstptr);
781 return(NOEXCEPTION);
784 else {
786 * is NaN; signaling or quiet?
788 if (Dbl_isone_signaling(opnd1p1)) {
789 /* trap if INVALIDTRAP enabled */
790 if (Is_invalidtrap_enabled())
791 return(OPC_2E_INVALIDEXCEPTION);
792 /* make NaN quiet */
793 Set_invalidflag();
794 Dbl_set_quiet(opnd1p1);
797 * is second operand a signaling NaN?
799 else if (Dbl_is_signalingnan(opnd2p1)) {
800 /* trap if INVALIDTRAP enabled */
801 if (Is_invalidtrap_enabled())
802 return(OPC_2E_INVALIDEXCEPTION);
803 /* make NaN quiet */
804 Set_invalidflag();
805 Dbl_set_quiet(opnd2p1);
806 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807 return(NOEXCEPTION);
810 * is third operand a signaling NaN?
812 else if (Dbl_is_signalingnan(opnd3p1)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION);
816 /* make NaN quiet */
817 Set_invalidflag();
818 Dbl_set_quiet(opnd3p1);
819 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820 return(NOEXCEPTION);
823 * return quiet NaN
825 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826 return(NOEXCEPTION);
831 * check second operand for NaN's or infinity
833 if (Dbl_isinfinity_exponent(opnd2p1)) {
834 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
838 * invalid since multiply operands are
839 * zero & infinity
841 if (Is_invalidtrap_enabled())
842 return(OPC_2E_INVALIDEXCEPTION);
843 Set_invalidflag();
844 Dbl_makequietnan(opnd2p1,opnd2p2);
845 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846 return(NOEXCEPTION);
850 * Check third operand for infinity with a
851 * sign opposite of the multiply result
853 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
856 * invalid since attempting a magnitude
857 * subtraction of infinities
859 if (Is_invalidtrap_enabled())
860 return(OPC_2E_INVALIDEXCEPTION);
861 Set_invalidflag();
862 Dbl_makequietnan(resultp1,resultp2);
863 Dbl_copytoptr(resultp1,resultp2,dstptr);
864 return(NOEXCEPTION);
868 * return infinity
870 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871 Dbl_copytoptr(resultp1,resultp2,dstptr);
872 return(NOEXCEPTION);
875 else {
877 * is NaN; signaling or quiet?
879 if (Dbl_isone_signaling(opnd2p1)) {
880 /* trap if INVALIDTRAP enabled */
881 if (Is_invalidtrap_enabled())
882 return(OPC_2E_INVALIDEXCEPTION);
883 /* make NaN quiet */
884 Set_invalidflag();
885 Dbl_set_quiet(opnd2p1);
888 * is third operand a signaling NaN?
890 else if (Dbl_is_signalingnan(opnd3p1)) {
891 /* trap if INVALIDTRAP enabled */
892 if (Is_invalidtrap_enabled())
893 return(OPC_2E_INVALIDEXCEPTION);
894 /* make NaN quiet */
895 Set_invalidflag();
896 Dbl_set_quiet(opnd3p1);
897 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898 return(NOEXCEPTION);
901 * return quiet NaN
903 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904 return(NOEXCEPTION);
909 * check third operand for NaN's or infinity
911 if (Dbl_isinfinity_exponent(opnd3p1)) {
912 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913 /* return infinity */
914 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915 return(NOEXCEPTION);
916 } else {
918 * is NaN; signaling or quiet?
920 if (Dbl_isone_signaling(opnd3p1)) {
921 /* trap if INVALIDTRAP enabled */
922 if (Is_invalidtrap_enabled())
923 return(OPC_2E_INVALIDEXCEPTION);
924 /* make NaN quiet */
925 Set_invalidflag();
926 Dbl_set_quiet(opnd3p1);
929 * return quiet NaN
931 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932 return(NOEXCEPTION);
937 * Generate multiply mantissa
939 if (Dbl_isnotzero_exponent(opnd1p1)) {
940 /* set hidden bit */
941 Dbl_clear_signexponent_set_hidden(opnd1p1);
943 else {
944 /* check for zero */
945 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
947 * Perform the add opnd3 with zero here.
949 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950 if (Is_rounding_mode(ROUNDMINUS)) {
951 Dbl_or_signs(opnd3p1,resultp1);
952 } else {
953 Dbl_and_signs(opnd3p1,resultp1);
957 * Now let's check for trapped underflow case.
959 else if (Dbl_iszero_exponent(opnd3p1) &&
960 Is_underflowtrap_enabled()) {
961 /* need to normalize results mantissa */
962 sign_save = Dbl_signextendedsign(opnd3p1);
963 result_exponent = 0;
964 Dbl_leftshiftby1(opnd3p1,opnd3p2);
965 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966 Dbl_set_sign(opnd3p1,/*using*/sign_save);
967 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968 unfl);
969 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970 /* inexact = FALSE */
971 return(OPC_2E_UNDERFLOWEXCEPTION);
973 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974 return(NOEXCEPTION);
976 /* is denormalized, adjust exponent */
977 Dbl_clear_signexponent(opnd1p1);
978 Dbl_leftshiftby1(opnd1p1,opnd1p2);
979 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
981 /* opnd2 needs to have hidden bit set with msb in hidden bit */
982 if (Dbl_isnotzero_exponent(opnd2p1)) {
983 Dbl_clear_signexponent_set_hidden(opnd2p1);
985 else {
986 /* check for zero */
987 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
989 * Perform the add opnd3 with zero here.
991 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992 if (Is_rounding_mode(ROUNDMINUS)) {
993 Dbl_or_signs(opnd3p1,resultp1);
994 } else {
995 Dbl_and_signs(opnd3p1,resultp1);
999 * Now let's check for trapped underflow case.
1001 else if (Dbl_iszero_exponent(opnd3p1) &&
1002 Is_underflowtrap_enabled()) {
1003 /* need to normalize results mantissa */
1004 sign_save = Dbl_signextendedsign(opnd3p1);
1005 result_exponent = 0;
1006 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010 unfl);
1011 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012 /* inexact = FALSE */
1013 return(OPC_2E_UNDERFLOWEXCEPTION);
1015 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016 return(NOEXCEPTION);
1018 /* is denormalized; want to normalize */
1019 Dbl_clear_signexponent(opnd2p1);
1020 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1024 /* Multiply the first two source mantissas together */
1027 * The intermediate result will be kept in tmpres,
1028 * which needs enough room for 106 bits of mantissa,
1029 * so lets call it a Double extended.
1031 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1034 * Four bits at a time are inspected in each loop, and a
1035 * simple shift and add multiply algorithm is used.
1037 for (count = DBL_P-1; count >= 0; count -= 4) {
1038 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039 if (Dbit28p2(opnd1p2)) {
1040 /* Fourword_add should be an ADD followed by 3 ADDC's */
1041 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1044 if (Dbit29p2(opnd1p2)) {
1045 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1048 if (Dbit30p2(opnd1p2)) {
1049 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1052 if (Dbit31p2(opnd1p2)) {
1053 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054 opnd2p1, opnd2p2, 0, 0);
1056 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1058 if (Is_dexthiddenoverflow(tmpresp1)) {
1059 /* result mantissa >= 2 (mantissa overflow) */
1060 mpy_exponent++;
1061 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1065 * Restore the sign of the mpy result which was saved in resultp1.
1066 * The exponent will continue to be kept in mpy_exponent.
1068 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1071 * No rounding is required, since the result of the multiply
1072 * is exact in the extended format.
1076 * Now we are ready to perform the add portion of the operation.
1078 * The exponents need to be kept as integers for now, since the
1079 * multiply result might not fit into the exponent field. We
1080 * can't overflow or underflow because of this yet, since the
1081 * add could bring the final result back into range.
1083 add_exponent = Dbl_exponent(opnd3p1);
1086 * Check for denormalized or zero add operand.
1088 if (add_exponent == 0) {
1089 /* check for zero */
1090 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091 /* right is zero */
1092 /* Left can't be zero and must be result.
1094 * The final result is now in tmpres and mpy_exponent,
1095 * and needs to be rounded and squeezed back into
1096 * double precision format from double extended.
1098 result_exponent = mpy_exponent;
1099 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100 resultp1,resultp2,resultp3,resultp4);
1101 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102 goto round;
1106 * Neither are zeroes.
1107 * Adjust exponent and normalize add operand.
1109 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1110 Dbl_clear_signexponent(opnd3p1);
1111 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1114 } else {
1115 Dbl_clear_exponent_set_hidden(opnd3p1);
1118 * Copy opnd3 to the double extended variable called right.
1120 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1123 * A zero "save" helps discover equal operands (for later),
1124 * and is used in swapping operands (if needed).
1126 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1129 * Compare magnitude of operands.
1131 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1136 * Set the left operand to the larger one by XOR swap.
1137 * First finish the first word "save".
1139 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142 rightp2,rightp3,rightp4);
1143 /* also setup exponents used in rest of routine */
1144 diff_exponent = add_exponent - mpy_exponent;
1145 result_exponent = add_exponent;
1146 } else {
1147 /* also setup exponents used in rest of routine */
1148 diff_exponent = mpy_exponent - add_exponent;
1149 result_exponent = mpy_exponent;
1151 /* Invariant: left is not smaller than right. */
1154 * Special case alignment of operands that would force alignment
1155 * beyond the extent of the extension. A further optimization
1156 * could special case this but only reduces the path length for
1157 * this infrequent case.
1159 if (diff_exponent > DBLEXT_THRESHOLD) {
1160 diff_exponent = DBLEXT_THRESHOLD;
1163 /* Align right operand by shifting it to the right */
1164 Dblext_clear_sign(rightp1);
1165 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166 /*shifted by*/diff_exponent);
1168 /* Treat sum and difference of the operands separately. */
1169 if ((int)save < 0) {
1171 * Difference of the two operands. Overflow can occur if the
1172 * multiply overflowed. A borrow can occur out of the hidden
1173 * bit and force a post normalization phase.
1175 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176 rightp1,rightp2,rightp3,rightp4,
1177 resultp1,resultp2,resultp3,resultp4);
1178 sign_save = Dbl_signextendedsign(resultp1);
1179 if (Dbl_iszero_hidden(resultp1)) {
1180 /* Handle normalization */
1181 /* A straightforward algorithm would now shift the
1182 * result and extension left until the hidden bit
1183 * becomes one. Not all of the extension bits need
1184 * participate in the shift. Only the two most
1185 * significant bits (round and guard) are needed.
1186 * If only a single shift is needed then the guard
1187 * bit becomes a significant low order bit and the
1188 * extension must participate in the rounding.
1189 * If more than a single shift is needed, then all
1190 * bits to the right of the guard bit are zeros,
1191 * and the guard bit may or may not be zero. */
1192 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193 resultp4);
1195 /* Need to check for a zero result. The sign and
1196 * exponent fields have already been zeroed. The more
1197 * efficient test of the full object can be used.
1199 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200 /* Must have been "x-x" or "x+(-x)". */
1201 if (Is_rounding_mode(ROUNDMINUS))
1202 Dbl_setone_sign(resultp1);
1203 Dbl_copytoptr(resultp1,resultp2,dstptr);
1204 return(NOEXCEPTION);
1206 result_exponent--;
1208 /* Look to see if normalization is finished. */
1209 if (Dbl_isone_hidden(resultp1)) {
1210 /* No further normalization is needed */
1211 goto round;
1214 /* Discover first one bit to determine shift amount.
1215 * Use a modified binary search. We have already
1216 * shifted the result one position right and still
1217 * not found a one so the remainder of the extension
1218 * must be zero and simplifies rounding. */
1219 /* Scan bytes */
1220 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222 result_exponent -= 8;
1224 /* Now narrow it down to the nibble */
1225 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226 /* The lower nibble contains the
1227 * normalizing one */
1228 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229 result_exponent -= 4;
1231 /* Select case where first bit is set (already
1232 * normalized) otherwise select the proper shift. */
1233 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234 if (jumpsize <= 7) switch(jumpsize) {
1235 case 1:
1236 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237 resultp4);
1238 result_exponent -= 3;
1239 break;
1240 case 2:
1241 case 3:
1242 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243 resultp4);
1244 result_exponent -= 2;
1245 break;
1246 case 4:
1247 case 5:
1248 case 6:
1249 case 7:
1250 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251 resultp4);
1252 result_exponent -= 1;
1253 break;
1255 } /* end if (hidden...)... */
1256 /* Fall through and round */
1257 } /* end if (save < 0)... */
1258 else {
1259 /* Add magnitudes */
1260 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261 rightp1,rightp2,rightp3,rightp4,
1262 /*to*/resultp1,resultp2,resultp3,resultp4);
1263 sign_save = Dbl_signextendedsign(resultp1);
1264 if (Dbl_isone_hiddenoverflow(resultp1)) {
1265 /* Prenormalization required. */
1266 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267 resultp4);
1268 result_exponent++;
1269 } /* end if hiddenoverflow... */
1270 } /* end else ...add magnitudes... */
1272 /* Round the result. If the extension and lower two words are
1273 * all zeros, then the result is exact. Otherwise round in the
1274 * correct direction. Underflow is possible. If a postnormalization
1275 * is necessary, then the mantissa is all zeros so no shift is needed.
1277 round:
1278 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280 result_exponent,is_tiny);
1282 Dbl_set_sign(resultp1,/*using*/sign_save);
1283 if (Dblext_isnotzero_mantissap3(resultp3) ||
1284 Dblext_isnotzero_mantissap4(resultp4)) {
1285 inexact = TRUE;
1286 switch(Rounding_mode()) {
1287 case ROUNDNEAREST: /* The default. */
1288 if (Dblext_isone_highp3(resultp3)) {
1289 /* at least 1/2 ulp */
1290 if (Dblext_isnotzero_low31p3(resultp3) ||
1291 Dblext_isnotzero_mantissap4(resultp4) ||
1292 Dblext_isone_lowp2(resultp2)) {
1293 /* either exactly half way and odd or
1294 * more than 1/2ulp */
1295 Dbl_increment(resultp1,resultp2);
1298 break;
1300 case ROUNDPLUS:
1301 if (Dbl_iszero_sign(resultp1)) {
1302 /* Round up positive results */
1303 Dbl_increment(resultp1,resultp2);
1305 break;
1307 case ROUNDMINUS:
1308 if (Dbl_isone_sign(resultp1)) {
1309 /* Round down negative results */
1310 Dbl_increment(resultp1,resultp2);
1313 case ROUNDZERO:;
1314 /* truncate is simple */
1315 } /* end switch... */
1316 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1318 if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319 /* Overflow */
1320 if (Is_overflowtrap_enabled()) {
1322 * Adjust bias of result
1324 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325 Dbl_copytoptr(resultp1,resultp2,dstptr);
1326 if (inexact)
1327 if (Is_inexacttrap_enabled())
1328 return (OPC_2E_OVERFLOWEXCEPTION |
1329 OPC_2E_INEXACTEXCEPTION);
1330 else Set_inexactflag();
1331 return (OPC_2E_OVERFLOWEXCEPTION);
1333 inexact = TRUE;
1334 Set_overflowflag();
1335 Dbl_setoverflow(resultp1,resultp2);
1336 } else if (result_exponent <= 0) { /* underflow case */
1337 if (Is_underflowtrap_enabled()) {
1339 * Adjust bias of result
1341 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342 Dbl_copytoptr(resultp1,resultp2,dstptr);
1343 if (inexact)
1344 if (Is_inexacttrap_enabled())
1345 return (OPC_2E_UNDERFLOWEXCEPTION |
1346 OPC_2E_INEXACTEXCEPTION);
1347 else Set_inexactflag();
1348 return(OPC_2E_UNDERFLOWEXCEPTION);
1350 else if (inexact && is_tiny) Set_underflowflag();
1352 else Dbl_set_exponent(resultp1,result_exponent);
1353 Dbl_copytoptr(resultp1,resultp2,dstptr);
1354 if (inexact)
1355 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356 else Set_inexactflag();
1357 return(NOEXCEPTION);
1361 * Single Floating-point Multiply Fused Add
1364 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1366 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367 unsigned int *status;
1369 unsigned int opnd1, opnd2, opnd3;
1370 register unsigned int tmpresp1, tmpresp2;
1371 unsigned int rightp1, rightp2;
1372 unsigned int resultp1, resultp2 = 0;
1373 register int mpy_exponent, add_exponent, count;
1374 boolean inexact = FALSE, is_tiny = FALSE;
1376 unsigned int signlessleft1, signlessright1, save;
1377 register int result_exponent, diff_exponent;
1378 int sign_save, jumpsize;
1380 Sgl_copyfromptr(src1ptr,opnd1);
1381 Sgl_copyfromptr(src2ptr,opnd2);
1382 Sgl_copyfromptr(src3ptr,opnd3);
1385 * set sign bit of result of multiply
1387 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388 Sgl_setnegativezero(resultp1);
1389 else Sgl_setzero(resultp1);
1392 * Generate multiply exponent
1394 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1397 * check first operand for NaN's or infinity
1399 if (Sgl_isinfinity_exponent(opnd1)) {
1400 if (Sgl_iszero_mantissa(opnd1)) {
1401 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402 if (Sgl_iszero_exponentmantissa(opnd2)) {
1404 * invalid since operands are infinity
1405 * and zero
1407 if (Is_invalidtrap_enabled())
1408 return(OPC_2E_INVALIDEXCEPTION);
1409 Set_invalidflag();
1410 Sgl_makequietnan(resultp1);
1411 Sgl_copytoptr(resultp1,dstptr);
1412 return(NOEXCEPTION);
1415 * Check third operand for infinity with a
1416 * sign opposite of the multiply result
1418 if (Sgl_isinfinity(opnd3) &&
1419 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1421 * invalid since attempting a magnitude
1422 * subtraction of infinities
1424 if (Is_invalidtrap_enabled())
1425 return(OPC_2E_INVALIDEXCEPTION);
1426 Set_invalidflag();
1427 Sgl_makequietnan(resultp1);
1428 Sgl_copytoptr(resultp1,dstptr);
1429 return(NOEXCEPTION);
1433 * return infinity
1435 Sgl_setinfinity_exponentmantissa(resultp1);
1436 Sgl_copytoptr(resultp1,dstptr);
1437 return(NOEXCEPTION);
1440 else {
1442 * is NaN; signaling or quiet?
1444 if (Sgl_isone_signaling(opnd1)) {
1445 /* trap if INVALIDTRAP enabled */
1446 if (Is_invalidtrap_enabled())
1447 return(OPC_2E_INVALIDEXCEPTION);
1448 /* make NaN quiet */
1449 Set_invalidflag();
1450 Sgl_set_quiet(opnd1);
1453 * is second operand a signaling NaN?
1455 else if (Sgl_is_signalingnan(opnd2)) {
1456 /* trap if INVALIDTRAP enabled */
1457 if (Is_invalidtrap_enabled())
1458 return(OPC_2E_INVALIDEXCEPTION);
1459 /* make NaN quiet */
1460 Set_invalidflag();
1461 Sgl_set_quiet(opnd2);
1462 Sgl_copytoptr(opnd2,dstptr);
1463 return(NOEXCEPTION);
1466 * is third operand a signaling NaN?
1468 else if (Sgl_is_signalingnan(opnd3)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION);
1472 /* make NaN quiet */
1473 Set_invalidflag();
1474 Sgl_set_quiet(opnd3);
1475 Sgl_copytoptr(opnd3,dstptr);
1476 return(NOEXCEPTION);
1479 * return quiet NaN
1481 Sgl_copytoptr(opnd1,dstptr);
1482 return(NOEXCEPTION);
1487 * check second operand for NaN's or infinity
1489 if (Sgl_isinfinity_exponent(opnd2)) {
1490 if (Sgl_iszero_mantissa(opnd2)) {
1491 if (Sgl_isnotnan(opnd3)) {
1492 if (Sgl_iszero_exponentmantissa(opnd1)) {
1494 * invalid since multiply operands are
1495 * zero & infinity
1497 if (Is_invalidtrap_enabled())
1498 return(OPC_2E_INVALIDEXCEPTION);
1499 Set_invalidflag();
1500 Sgl_makequietnan(opnd2);
1501 Sgl_copytoptr(opnd2,dstptr);
1502 return(NOEXCEPTION);
1506 * Check third operand for infinity with a
1507 * sign opposite of the multiply result
1509 if (Sgl_isinfinity(opnd3) &&
1510 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1512 * invalid since attempting a magnitude
1513 * subtraction of infinities
1515 if (Is_invalidtrap_enabled())
1516 return(OPC_2E_INVALIDEXCEPTION);
1517 Set_invalidflag();
1518 Sgl_makequietnan(resultp1);
1519 Sgl_copytoptr(resultp1,dstptr);
1520 return(NOEXCEPTION);
1524 * return infinity
1526 Sgl_setinfinity_exponentmantissa(resultp1);
1527 Sgl_copytoptr(resultp1,dstptr);
1528 return(NOEXCEPTION);
1531 else {
1533 * is NaN; signaling or quiet?
1535 if (Sgl_isone_signaling(opnd2)) {
1536 /* trap if INVALIDTRAP enabled */
1537 if (Is_invalidtrap_enabled())
1538 return(OPC_2E_INVALIDEXCEPTION);
1539 /* make NaN quiet */
1540 Set_invalidflag();
1541 Sgl_set_quiet(opnd2);
1544 * is third operand a signaling NaN?
1546 else if (Sgl_is_signalingnan(opnd3)) {
1547 /* trap if INVALIDTRAP enabled */
1548 if (Is_invalidtrap_enabled())
1549 return(OPC_2E_INVALIDEXCEPTION);
1550 /* make NaN quiet */
1551 Set_invalidflag();
1552 Sgl_set_quiet(opnd3);
1553 Sgl_copytoptr(opnd3,dstptr);
1554 return(NOEXCEPTION);
1557 * return quiet NaN
1559 Sgl_copytoptr(opnd2,dstptr);
1560 return(NOEXCEPTION);
1565 * check third operand for NaN's or infinity
1567 if (Sgl_isinfinity_exponent(opnd3)) {
1568 if (Sgl_iszero_mantissa(opnd3)) {
1569 /* return infinity */
1570 Sgl_copytoptr(opnd3,dstptr);
1571 return(NOEXCEPTION);
1572 } else {
1574 * is NaN; signaling or quiet?
1576 if (Sgl_isone_signaling(opnd3)) {
1577 /* trap if INVALIDTRAP enabled */
1578 if (Is_invalidtrap_enabled())
1579 return(OPC_2E_INVALIDEXCEPTION);
1580 /* make NaN quiet */
1581 Set_invalidflag();
1582 Sgl_set_quiet(opnd3);
1585 * return quiet NaN
1587 Sgl_copytoptr(opnd3,dstptr);
1588 return(NOEXCEPTION);
1593 * Generate multiply mantissa
1595 if (Sgl_isnotzero_exponent(opnd1)) {
1596 /* set hidden bit */
1597 Sgl_clear_signexponent_set_hidden(opnd1);
1599 else {
1600 /* check for zero */
1601 if (Sgl_iszero_mantissa(opnd1)) {
1603 * Perform the add opnd3 with zero here.
1605 if (Sgl_iszero_exponentmantissa(opnd3)) {
1606 if (Is_rounding_mode(ROUNDMINUS)) {
1607 Sgl_or_signs(opnd3,resultp1);
1608 } else {
1609 Sgl_and_signs(opnd3,resultp1);
1613 * Now let's check for trapped underflow case.
1615 else if (Sgl_iszero_exponent(opnd3) &&
1616 Is_underflowtrap_enabled()) {
1617 /* need to normalize results mantissa */
1618 sign_save = Sgl_signextendedsign(opnd3);
1619 result_exponent = 0;
1620 Sgl_leftshiftby1(opnd3);
1621 Sgl_normalize(opnd3,result_exponent);
1622 Sgl_set_sign(opnd3,/*using*/sign_save);
1623 Sgl_setwrapped_exponent(opnd3,result_exponent,
1624 unfl);
1625 Sgl_copytoptr(opnd3,dstptr);
1626 /* inexact = FALSE */
1627 return(OPC_2E_UNDERFLOWEXCEPTION);
1629 Sgl_copytoptr(opnd3,dstptr);
1630 return(NOEXCEPTION);
1632 /* is denormalized, adjust exponent */
1633 Sgl_clear_signexponent(opnd1);
1634 Sgl_leftshiftby1(opnd1);
1635 Sgl_normalize(opnd1,mpy_exponent);
1637 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1638 if (Sgl_isnotzero_exponent(opnd2)) {
1639 Sgl_clear_signexponent_set_hidden(opnd2);
1641 else {
1642 /* check for zero */
1643 if (Sgl_iszero_mantissa(opnd2)) {
1645 * Perform the add opnd3 with zero here.
1647 if (Sgl_iszero_exponentmantissa(opnd3)) {
1648 if (Is_rounding_mode(ROUNDMINUS)) {
1649 Sgl_or_signs(opnd3,resultp1);
1650 } else {
1651 Sgl_and_signs(opnd3,resultp1);
1655 * Now let's check for trapped underflow case.
1657 else if (Sgl_iszero_exponent(opnd3) &&
1658 Is_underflowtrap_enabled()) {
1659 /* need to normalize results mantissa */
1660 sign_save = Sgl_signextendedsign(opnd3);
1661 result_exponent = 0;
1662 Sgl_leftshiftby1(opnd3);
1663 Sgl_normalize(opnd3,result_exponent);
1664 Sgl_set_sign(opnd3,/*using*/sign_save);
1665 Sgl_setwrapped_exponent(opnd3,result_exponent,
1666 unfl);
1667 Sgl_copytoptr(opnd3,dstptr);
1668 /* inexact = FALSE */
1669 return(OPC_2E_UNDERFLOWEXCEPTION);
1671 Sgl_copytoptr(opnd3,dstptr);
1672 return(NOEXCEPTION);
1674 /* is denormalized; want to normalize */
1675 Sgl_clear_signexponent(opnd2);
1676 Sgl_leftshiftby1(opnd2);
1677 Sgl_normalize(opnd2,mpy_exponent);
1680 /* Multiply the first two source mantissas together */
1683 * The intermediate result will be kept in tmpres,
1684 * which needs enough room for 106 bits of mantissa,
1685 * so lets call it a Double extended.
1687 Sglext_setzero(tmpresp1,tmpresp2);
1690 * Four bits at a time are inspected in each loop, and a
1691 * simple shift and add multiply algorithm is used.
1693 for (count = SGL_P-1; count >= 0; count -= 4) {
1694 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695 if (Sbit28(opnd1)) {
1696 /* Twoword_add should be an ADD followed by 2 ADDC's */
1697 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1699 if (Sbit29(opnd1)) {
1700 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1702 if (Sbit30(opnd1)) {
1703 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1705 if (Sbit31(opnd1)) {
1706 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1708 Sgl_rightshiftby4(opnd1);
1710 if (Is_sexthiddenoverflow(tmpresp1)) {
1711 /* result mantissa >= 2 (mantissa overflow) */
1712 mpy_exponent++;
1713 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714 } else {
1715 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1719 * Restore the sign of the mpy result which was saved in resultp1.
1720 * The exponent will continue to be kept in mpy_exponent.
1722 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1725 * No rounding is required, since the result of the multiply
1726 * is exact in the extended format.
1730 * Now we are ready to perform the add portion of the operation.
1732 * The exponents need to be kept as integers for now, since the
1733 * multiply result might not fit into the exponent field. We
1734 * can't overflow or underflow because of this yet, since the
1735 * add could bring the final result back into range.
1737 add_exponent = Sgl_exponent(opnd3);
1740 * Check for denormalized or zero add operand.
1742 if (add_exponent == 0) {
1743 /* check for zero */
1744 if (Sgl_iszero_mantissa(opnd3)) {
1745 /* right is zero */
1746 /* Left can't be zero and must be result.
1748 * The final result is now in tmpres and mpy_exponent,
1749 * and needs to be rounded and squeezed back into
1750 * double precision format from double extended.
1752 result_exponent = mpy_exponent;
1753 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755 goto round;
1759 * Neither are zeroes.
1760 * Adjust exponent and normalize add operand.
1762 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1763 Sgl_clear_signexponent(opnd3);
1764 Sgl_leftshiftby1(opnd3);
1765 Sgl_normalize(opnd3,add_exponent);
1766 Sgl_set_sign(opnd3,sign_save); /* restore sign */
1767 } else {
1768 Sgl_clear_exponent_set_hidden(opnd3);
1771 * Copy opnd3 to the double extended variable called right.
1773 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1776 * A zero "save" helps discover equal operands (for later),
1777 * and is used in swapping operands (if needed).
1779 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1782 * Compare magnitude of operands.
1784 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1789 * Set the left operand to the larger one by XOR swap.
1790 * First finish the first word "save".
1792 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794 Sglext_swap_lower(tmpresp2,rightp2);
1795 /* also setup exponents used in rest of routine */
1796 diff_exponent = add_exponent - mpy_exponent;
1797 result_exponent = add_exponent;
1798 } else {
1799 /* also setup exponents used in rest of routine */
1800 diff_exponent = mpy_exponent - add_exponent;
1801 result_exponent = mpy_exponent;
1803 /* Invariant: left is not smaller than right. */
1806 * Special case alignment of operands that would force alignment
1807 * beyond the extent of the extension. A further optimization
1808 * could special case this but only reduces the path length for
1809 * this infrequent case.
1811 if (diff_exponent > SGLEXT_THRESHOLD) {
1812 diff_exponent = SGLEXT_THRESHOLD;
1815 /* Align right operand by shifting it to the right */
1816 Sglext_clear_sign(rightp1);
1817 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1819 /* Treat sum and difference of the operands separately. */
1820 if ((int)save < 0) {
1822 * Difference of the two operands. Overflow can occur if the
1823 * multiply overflowed. A borrow can occur out of the hidden
1824 * bit and force a post normalization phase.
1826 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827 resultp1,resultp2);
1828 sign_save = Sgl_signextendedsign(resultp1);
1829 if (Sgl_iszero_hidden(resultp1)) {
1830 /* Handle normalization */
1831 /* A straightforward algorithm would now shift the
1832 * result and extension left until the hidden bit
1833 * becomes one. Not all of the extension bits need
1834 * participate in the shift. Only the two most
1835 * significant bits (round and guard) are needed.
1836 * If only a single shift is needed then the guard
1837 * bit becomes a significant low order bit and the
1838 * extension must participate in the rounding.
1839 * If more than a single shift is needed, then all
1840 * bits to the right of the guard bit are zeros,
1841 * and the guard bit may or may not be zero. */
1842 Sglext_leftshiftby1(resultp1,resultp2);
1844 /* Need to check for a zero result. The sign and
1845 * exponent fields have already been zeroed. The more
1846 * efficient test of the full object can be used.
1848 if (Sglext_iszero(resultp1,resultp2)) {
1849 /* Must have been "x-x" or "x+(-x)". */
1850 if (Is_rounding_mode(ROUNDMINUS))
1851 Sgl_setone_sign(resultp1);
1852 Sgl_copytoptr(resultp1,dstptr);
1853 return(NOEXCEPTION);
1855 result_exponent--;
1857 /* Look to see if normalization is finished. */
1858 if (Sgl_isone_hidden(resultp1)) {
1859 /* No further normalization is needed */
1860 goto round;
1863 /* Discover first one bit to determine shift amount.
1864 * Use a modified binary search. We have already
1865 * shifted the result one position right and still
1866 * not found a one so the remainder of the extension
1867 * must be zero and simplifies rounding. */
1868 /* Scan bytes */
1869 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870 Sglext_leftshiftby8(resultp1,resultp2);
1871 result_exponent -= 8;
1873 /* Now narrow it down to the nibble */
1874 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875 /* The lower nibble contains the
1876 * normalizing one */
1877 Sglext_leftshiftby4(resultp1,resultp2);
1878 result_exponent -= 4;
1880 /* Select case where first bit is set (already
1881 * normalized) otherwise select the proper shift. */
1882 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883 if (jumpsize <= 7) switch(jumpsize) {
1884 case 1:
1885 Sglext_leftshiftby3(resultp1,resultp2);
1886 result_exponent -= 3;
1887 break;
1888 case 2:
1889 case 3:
1890 Sglext_leftshiftby2(resultp1,resultp2);
1891 result_exponent -= 2;
1892 break;
1893 case 4:
1894 case 5:
1895 case 6:
1896 case 7:
1897 Sglext_leftshiftby1(resultp1,resultp2);
1898 result_exponent -= 1;
1899 break;
1901 } /* end if (hidden...)... */
1902 /* Fall through and round */
1903 } /* end if (save < 0)... */
1904 else {
1905 /* Add magnitudes */
1906 Sglext_addition(tmpresp1,tmpresp2,
1907 rightp1,rightp2, /*to*/resultp1,resultp2);
1908 sign_save = Sgl_signextendedsign(resultp1);
1909 if (Sgl_isone_hiddenoverflow(resultp1)) {
1910 /* Prenormalization required. */
1911 Sglext_arithrightshiftby1(resultp1,resultp2);
1912 result_exponent++;
1913 } /* end if hiddenoverflow... */
1914 } /* end else ...add magnitudes... */
1916 /* Round the result. If the extension and lower two words are
1917 * all zeros, then the result is exact. Otherwise round in the
1918 * correct direction. Underflow is possible. If a postnormalization
1919 * is necessary, then the mantissa is all zeros so no shift is needed.
1921 round:
1922 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1925 Sgl_set_sign(resultp1,/*using*/sign_save);
1926 if (Sglext_isnotzero_mantissap2(resultp2)) {
1927 inexact = TRUE;
1928 switch(Rounding_mode()) {
1929 case ROUNDNEAREST: /* The default. */
1930 if (Sglext_isone_highp2(resultp2)) {
1931 /* at least 1/2 ulp */
1932 if (Sglext_isnotzero_low31p2(resultp2) ||
1933 Sglext_isone_lowp1(resultp1)) {
1934 /* either exactly half way and odd or
1935 * more than 1/2ulp */
1936 Sgl_increment(resultp1);
1939 break;
1941 case ROUNDPLUS:
1942 if (Sgl_iszero_sign(resultp1)) {
1943 /* Round up positive results */
1944 Sgl_increment(resultp1);
1946 break;
1948 case ROUNDMINUS:
1949 if (Sgl_isone_sign(resultp1)) {
1950 /* Round down negative results */
1951 Sgl_increment(resultp1);
1954 case ROUNDZERO:;
1955 /* truncate is simple */
1956 } /* end switch... */
1957 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1959 if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960 /* Overflow */
1961 if (Is_overflowtrap_enabled()) {
1963 * Adjust bias of result
1965 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966 Sgl_copytoptr(resultp1,dstptr);
1967 if (inexact)
1968 if (Is_inexacttrap_enabled())
1969 return (OPC_2E_OVERFLOWEXCEPTION |
1970 OPC_2E_INEXACTEXCEPTION);
1971 else Set_inexactflag();
1972 return (OPC_2E_OVERFLOWEXCEPTION);
1974 inexact = TRUE;
1975 Set_overflowflag();
1976 Sgl_setoverflow(resultp1);
1977 } else if (result_exponent <= 0) { /* underflow case */
1978 if (Is_underflowtrap_enabled()) {
1980 * Adjust bias of result
1982 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983 Sgl_copytoptr(resultp1,dstptr);
1984 if (inexact)
1985 if (Is_inexacttrap_enabled())
1986 return (OPC_2E_UNDERFLOWEXCEPTION |
1987 OPC_2E_INEXACTEXCEPTION);
1988 else Set_inexactflag();
1989 return(OPC_2E_UNDERFLOWEXCEPTION);
1991 else if (inexact && is_tiny) Set_underflowflag();
1993 else Sgl_set_exponent(resultp1,result_exponent);
1994 Sgl_copytoptr(resultp1,dstptr);
1995 if (inexact)
1996 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997 else Set_inexactflag();
1998 return(NOEXCEPTION);
2002 * Single Floating-point Multiply Negate Fused Add
2005 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2007 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008 unsigned int *status;
2010 unsigned int opnd1, opnd2, opnd3;
2011 register unsigned int tmpresp1, tmpresp2;
2012 unsigned int rightp1, rightp2;
2013 unsigned int resultp1, resultp2 = 0;
2014 register int mpy_exponent, add_exponent, count;
2015 boolean inexact = FALSE, is_tiny = FALSE;
2017 unsigned int signlessleft1, signlessright1, save;
2018 register int result_exponent, diff_exponent;
2019 int sign_save, jumpsize;
2021 Sgl_copyfromptr(src1ptr,opnd1);
2022 Sgl_copyfromptr(src2ptr,opnd2);
2023 Sgl_copyfromptr(src3ptr,opnd3);
2026 * set sign bit of result of multiply
2028 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029 Sgl_setzero(resultp1);
2030 else
2031 Sgl_setnegativezero(resultp1);
2034 * Generate multiply exponent
2036 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2039 * check first operand for NaN's or infinity
2041 if (Sgl_isinfinity_exponent(opnd1)) {
2042 if (Sgl_iszero_mantissa(opnd1)) {
2043 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044 if (Sgl_iszero_exponentmantissa(opnd2)) {
2046 * invalid since operands are infinity
2047 * and zero
2049 if (Is_invalidtrap_enabled())
2050 return(OPC_2E_INVALIDEXCEPTION);
2051 Set_invalidflag();
2052 Sgl_makequietnan(resultp1);
2053 Sgl_copytoptr(resultp1,dstptr);
2054 return(NOEXCEPTION);
2057 * Check third operand for infinity with a
2058 * sign opposite of the multiply result
2060 if (Sgl_isinfinity(opnd3) &&
2061 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2063 * invalid since attempting a magnitude
2064 * subtraction of infinities
2066 if (Is_invalidtrap_enabled())
2067 return(OPC_2E_INVALIDEXCEPTION);
2068 Set_invalidflag();
2069 Sgl_makequietnan(resultp1);
2070 Sgl_copytoptr(resultp1,dstptr);
2071 return(NOEXCEPTION);
2075 * return infinity
2077 Sgl_setinfinity_exponentmantissa(resultp1);
2078 Sgl_copytoptr(resultp1,dstptr);
2079 return(NOEXCEPTION);
2082 else {
2084 * is NaN; signaling or quiet?
2086 if (Sgl_isone_signaling(opnd1)) {
2087 /* trap if INVALIDTRAP enabled */
2088 if (Is_invalidtrap_enabled())
2089 return(OPC_2E_INVALIDEXCEPTION);
2090 /* make NaN quiet */
2091 Set_invalidflag();
2092 Sgl_set_quiet(opnd1);
2095 * is second operand a signaling NaN?
2097 else if (Sgl_is_signalingnan(opnd2)) {
2098 /* trap if INVALIDTRAP enabled */
2099 if (Is_invalidtrap_enabled())
2100 return(OPC_2E_INVALIDEXCEPTION);
2101 /* make NaN quiet */
2102 Set_invalidflag();
2103 Sgl_set_quiet(opnd2);
2104 Sgl_copytoptr(opnd2,dstptr);
2105 return(NOEXCEPTION);
2108 * is third operand a signaling NaN?
2110 else if (Sgl_is_signalingnan(opnd3)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION);
2114 /* make NaN quiet */
2115 Set_invalidflag();
2116 Sgl_set_quiet(opnd3);
2117 Sgl_copytoptr(opnd3,dstptr);
2118 return(NOEXCEPTION);
2121 * return quiet NaN
2123 Sgl_copytoptr(opnd1,dstptr);
2124 return(NOEXCEPTION);
2129 * check second operand for NaN's or infinity
2131 if (Sgl_isinfinity_exponent(opnd2)) {
2132 if (Sgl_iszero_mantissa(opnd2)) {
2133 if (Sgl_isnotnan(opnd3)) {
2134 if (Sgl_iszero_exponentmantissa(opnd1)) {
2136 * invalid since multiply operands are
2137 * zero & infinity
2139 if (Is_invalidtrap_enabled())
2140 return(OPC_2E_INVALIDEXCEPTION);
2141 Set_invalidflag();
2142 Sgl_makequietnan(opnd2);
2143 Sgl_copytoptr(opnd2,dstptr);
2144 return(NOEXCEPTION);
2148 * Check third operand for infinity with a
2149 * sign opposite of the multiply result
2151 if (Sgl_isinfinity(opnd3) &&
2152 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2154 * invalid since attempting a magnitude
2155 * subtraction of infinities
2157 if (Is_invalidtrap_enabled())
2158 return(OPC_2E_INVALIDEXCEPTION);
2159 Set_invalidflag();
2160 Sgl_makequietnan(resultp1);
2161 Sgl_copytoptr(resultp1,dstptr);
2162 return(NOEXCEPTION);
2166 * return infinity
2168 Sgl_setinfinity_exponentmantissa(resultp1);
2169 Sgl_copytoptr(resultp1,dstptr);
2170 return(NOEXCEPTION);
2173 else {
2175 * is NaN; signaling or quiet?
2177 if (Sgl_isone_signaling(opnd2)) {
2178 /* trap if INVALIDTRAP enabled */
2179 if (Is_invalidtrap_enabled())
2180 return(OPC_2E_INVALIDEXCEPTION);
2181 /* make NaN quiet */
2182 Set_invalidflag();
2183 Sgl_set_quiet(opnd2);
2186 * is third operand a signaling NaN?
2188 else if (Sgl_is_signalingnan(opnd3)) {
2189 /* trap if INVALIDTRAP enabled */
2190 if (Is_invalidtrap_enabled())
2191 return(OPC_2E_INVALIDEXCEPTION);
2192 /* make NaN quiet */
2193 Set_invalidflag();
2194 Sgl_set_quiet(opnd3);
2195 Sgl_copytoptr(opnd3,dstptr);
2196 return(NOEXCEPTION);
2199 * return quiet NaN
2201 Sgl_copytoptr(opnd2,dstptr);
2202 return(NOEXCEPTION);
2207 * check third operand for NaN's or infinity
2209 if (Sgl_isinfinity_exponent(opnd3)) {
2210 if (Sgl_iszero_mantissa(opnd3)) {
2211 /* return infinity */
2212 Sgl_copytoptr(opnd3,dstptr);
2213 return(NOEXCEPTION);
2214 } else {
2216 * is NaN; signaling or quiet?
2218 if (Sgl_isone_signaling(opnd3)) {
2219 /* trap if INVALIDTRAP enabled */
2220 if (Is_invalidtrap_enabled())
2221 return(OPC_2E_INVALIDEXCEPTION);
2222 /* make NaN quiet */
2223 Set_invalidflag();
2224 Sgl_set_quiet(opnd3);
2227 * return quiet NaN
2229 Sgl_copytoptr(opnd3,dstptr);
2230 return(NOEXCEPTION);
2235 * Generate multiply mantissa
2237 if (Sgl_isnotzero_exponent(opnd1)) {
2238 /* set hidden bit */
2239 Sgl_clear_signexponent_set_hidden(opnd1);
2241 else {
2242 /* check for zero */
2243 if (Sgl_iszero_mantissa(opnd1)) {
2245 * Perform the add opnd3 with zero here.
2247 if (Sgl_iszero_exponentmantissa(opnd3)) {
2248 if (Is_rounding_mode(ROUNDMINUS)) {
2249 Sgl_or_signs(opnd3,resultp1);
2250 } else {
2251 Sgl_and_signs(opnd3,resultp1);
2255 * Now let's check for trapped underflow case.
2257 else if (Sgl_iszero_exponent(opnd3) &&
2258 Is_underflowtrap_enabled()) {
2259 /* need to normalize results mantissa */
2260 sign_save = Sgl_signextendedsign(opnd3);
2261 result_exponent = 0;
2262 Sgl_leftshiftby1(opnd3);
2263 Sgl_normalize(opnd3,result_exponent);
2264 Sgl_set_sign(opnd3,/*using*/sign_save);
2265 Sgl_setwrapped_exponent(opnd3,result_exponent,
2266 unfl);
2267 Sgl_copytoptr(opnd3,dstptr);
2268 /* inexact = FALSE */
2269 return(OPC_2E_UNDERFLOWEXCEPTION);
2271 Sgl_copytoptr(opnd3,dstptr);
2272 return(NOEXCEPTION);
2274 /* is denormalized, adjust exponent */
2275 Sgl_clear_signexponent(opnd1);
2276 Sgl_leftshiftby1(opnd1);
2277 Sgl_normalize(opnd1,mpy_exponent);
2279 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2280 if (Sgl_isnotzero_exponent(opnd2)) {
2281 Sgl_clear_signexponent_set_hidden(opnd2);
2283 else {
2284 /* check for zero */
2285 if (Sgl_iszero_mantissa(opnd2)) {
2287 * Perform the add opnd3 with zero here.
2289 if (Sgl_iszero_exponentmantissa(opnd3)) {
2290 if (Is_rounding_mode(ROUNDMINUS)) {
2291 Sgl_or_signs(opnd3,resultp1);
2292 } else {
2293 Sgl_and_signs(opnd3,resultp1);
2297 * Now let's check for trapped underflow case.
2299 else if (Sgl_iszero_exponent(opnd3) &&
2300 Is_underflowtrap_enabled()) {
2301 /* need to normalize results mantissa */
2302 sign_save = Sgl_signextendedsign(opnd3);
2303 result_exponent = 0;
2304 Sgl_leftshiftby1(opnd3);
2305 Sgl_normalize(opnd3,result_exponent);
2306 Sgl_set_sign(opnd3,/*using*/sign_save);
2307 Sgl_setwrapped_exponent(opnd3,result_exponent,
2308 unfl);
2309 Sgl_copytoptr(opnd3,dstptr);
2310 /* inexact = FALSE */
2311 return(OPC_2E_UNDERFLOWEXCEPTION);
2313 Sgl_copytoptr(opnd3,dstptr);
2314 return(NOEXCEPTION);
2316 /* is denormalized; want to normalize */
2317 Sgl_clear_signexponent(opnd2);
2318 Sgl_leftshiftby1(opnd2);
2319 Sgl_normalize(opnd2,mpy_exponent);
2322 /* Multiply the first two source mantissas together */
2325 * The intermediate result will be kept in tmpres,
2326 * which needs enough room for 106 bits of mantissa,
2327 * so lets call it a Double extended.
2329 Sglext_setzero(tmpresp1,tmpresp2);
2332 * Four bits at a time are inspected in each loop, and a
2333 * simple shift and add multiply algorithm is used.
2335 for (count = SGL_P-1; count >= 0; count -= 4) {
2336 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337 if (Sbit28(opnd1)) {
2338 /* Twoword_add should be an ADD followed by 2 ADDC's */
2339 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2341 if (Sbit29(opnd1)) {
2342 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2344 if (Sbit30(opnd1)) {
2345 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2347 if (Sbit31(opnd1)) {
2348 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2350 Sgl_rightshiftby4(opnd1);
2352 if (Is_sexthiddenoverflow(tmpresp1)) {
2353 /* result mantissa >= 2 (mantissa overflow) */
2354 mpy_exponent++;
2355 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356 } else {
2357 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2361 * Restore the sign of the mpy result which was saved in resultp1.
2362 * The exponent will continue to be kept in mpy_exponent.
2364 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2367 * No rounding is required, since the result of the multiply
2368 * is exact in the extended format.
2372 * Now we are ready to perform the add portion of the operation.
2374 * The exponents need to be kept as integers for now, since the
2375 * multiply result might not fit into the exponent field. We
2376 * can't overflow or underflow because of this yet, since the
2377 * add could bring the final result back into range.
2379 add_exponent = Sgl_exponent(opnd3);
2382 * Check for denormalized or zero add operand.
2384 if (add_exponent == 0) {
2385 /* check for zero */
2386 if (Sgl_iszero_mantissa(opnd3)) {
2387 /* right is zero */
2388 /* Left can't be zero and must be result.
2390 * The final result is now in tmpres and mpy_exponent,
2391 * and needs to be rounded and squeezed back into
2392 * double precision format from double extended.
2394 result_exponent = mpy_exponent;
2395 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397 goto round;
2401 * Neither are zeroes.
2402 * Adjust exponent and normalize add operand.
2404 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2405 Sgl_clear_signexponent(opnd3);
2406 Sgl_leftshiftby1(opnd3);
2407 Sgl_normalize(opnd3,add_exponent);
2408 Sgl_set_sign(opnd3,sign_save); /* restore sign */
2409 } else {
2410 Sgl_clear_exponent_set_hidden(opnd3);
2413 * Copy opnd3 to the double extended variable called right.
2415 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2418 * A zero "save" helps discover equal operands (for later),
2419 * and is used in swapping operands (if needed).
2421 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2424 * Compare magnitude of operands.
2426 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2431 * Set the left operand to the larger one by XOR swap.
2432 * First finish the first word "save".
2434 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436 Sglext_swap_lower(tmpresp2,rightp2);
2437 /* also setup exponents used in rest of routine */
2438 diff_exponent = add_exponent - mpy_exponent;
2439 result_exponent = add_exponent;
2440 } else {
2441 /* also setup exponents used in rest of routine */
2442 diff_exponent = mpy_exponent - add_exponent;
2443 result_exponent = mpy_exponent;
2445 /* Invariant: left is not smaller than right. */
2448 * Special case alignment of operands that would force alignment
2449 * beyond the extent of the extension. A further optimization
2450 * could special case this but only reduces the path length for
2451 * this infrequent case.
2453 if (diff_exponent > SGLEXT_THRESHOLD) {
2454 diff_exponent = SGLEXT_THRESHOLD;
2457 /* Align right operand by shifting it to the right */
2458 Sglext_clear_sign(rightp1);
2459 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2461 /* Treat sum and difference of the operands separately. */
2462 if ((int)save < 0) {
2464 * Difference of the two operands. Overflow can occur if the
2465 * multiply overflowed. A borrow can occur out of the hidden
2466 * bit and force a post normalization phase.
2468 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469 resultp1,resultp2);
2470 sign_save = Sgl_signextendedsign(resultp1);
2471 if (Sgl_iszero_hidden(resultp1)) {
2472 /* Handle normalization */
2473 /* A straightforward algorithm would now shift the
2474 * result and extension left until the hidden bit
2475 * becomes one. Not all of the extension bits need
2476 * participate in the shift. Only the two most
2477 * significant bits (round and guard) are needed.
2478 * If only a single shift is needed then the guard
2479 * bit becomes a significant low order bit and the
2480 * extension must participate in the rounding.
2481 * If more than a single shift is needed, then all
2482 * bits to the right of the guard bit are zeros,
2483 * and the guard bit may or may not be zero. */
2484 Sglext_leftshiftby1(resultp1,resultp2);
2486 /* Need to check for a zero result. The sign and
2487 * exponent fields have already been zeroed. The more
2488 * efficient test of the full object can be used.
2490 if (Sglext_iszero(resultp1,resultp2)) {
2491 /* Must have been "x-x" or "x+(-x)". */
2492 if (Is_rounding_mode(ROUNDMINUS))
2493 Sgl_setone_sign(resultp1);
2494 Sgl_copytoptr(resultp1,dstptr);
2495 return(NOEXCEPTION);
2497 result_exponent--;
2499 /* Look to see if normalization is finished. */
2500 if (Sgl_isone_hidden(resultp1)) {
2501 /* No further normalization is needed */
2502 goto round;
2505 /* Discover first one bit to determine shift amount.
2506 * Use a modified binary search. We have already
2507 * shifted the result one position right and still
2508 * not found a one so the remainder of the extension
2509 * must be zero and simplifies rounding. */
2510 /* Scan bytes */
2511 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512 Sglext_leftshiftby8(resultp1,resultp2);
2513 result_exponent -= 8;
2515 /* Now narrow it down to the nibble */
2516 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517 /* The lower nibble contains the
2518 * normalizing one */
2519 Sglext_leftshiftby4(resultp1,resultp2);
2520 result_exponent -= 4;
2522 /* Select case where first bit is set (already
2523 * normalized) otherwise select the proper shift. */
2524 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525 if (jumpsize <= 7) switch(jumpsize) {
2526 case 1:
2527 Sglext_leftshiftby3(resultp1,resultp2);
2528 result_exponent -= 3;
2529 break;
2530 case 2:
2531 case 3:
2532 Sglext_leftshiftby2(resultp1,resultp2);
2533 result_exponent -= 2;
2534 break;
2535 case 4:
2536 case 5:
2537 case 6:
2538 case 7:
2539 Sglext_leftshiftby1(resultp1,resultp2);
2540 result_exponent -= 1;
2541 break;
2543 } /* end if (hidden...)... */
2544 /* Fall through and round */
2545 } /* end if (save < 0)... */
2546 else {
2547 /* Add magnitudes */
2548 Sglext_addition(tmpresp1,tmpresp2,
2549 rightp1,rightp2, /*to*/resultp1,resultp2);
2550 sign_save = Sgl_signextendedsign(resultp1);
2551 if (Sgl_isone_hiddenoverflow(resultp1)) {
2552 /* Prenormalization required. */
2553 Sglext_arithrightshiftby1(resultp1,resultp2);
2554 result_exponent++;
2555 } /* end if hiddenoverflow... */
2556 } /* end else ...add magnitudes... */
2558 /* Round the result. If the extension and lower two words are
2559 * all zeros, then the result is exact. Otherwise round in the
2560 * correct direction. Underflow is possible. If a postnormalization
2561 * is necessary, then the mantissa is all zeros so no shift is needed.
2563 round:
2564 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2567 Sgl_set_sign(resultp1,/*using*/sign_save);
2568 if (Sglext_isnotzero_mantissap2(resultp2)) {
2569 inexact = TRUE;
2570 switch(Rounding_mode()) {
2571 case ROUNDNEAREST: /* The default. */
2572 if (Sglext_isone_highp2(resultp2)) {
2573 /* at least 1/2 ulp */
2574 if (Sglext_isnotzero_low31p2(resultp2) ||
2575 Sglext_isone_lowp1(resultp1)) {
2576 /* either exactly half way and odd or
2577 * more than 1/2ulp */
2578 Sgl_increment(resultp1);
2581 break;
2583 case ROUNDPLUS:
2584 if (Sgl_iszero_sign(resultp1)) {
2585 /* Round up positive results */
2586 Sgl_increment(resultp1);
2588 break;
2590 case ROUNDMINUS:
2591 if (Sgl_isone_sign(resultp1)) {
2592 /* Round down negative results */
2593 Sgl_increment(resultp1);
2596 case ROUNDZERO:;
2597 /* truncate is simple */
2598 } /* end switch... */
2599 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2601 if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602 /* Overflow */
2603 if (Is_overflowtrap_enabled()) {
2605 * Adjust bias of result
2607 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608 Sgl_copytoptr(resultp1,dstptr);
2609 if (inexact)
2610 if (Is_inexacttrap_enabled())
2611 return (OPC_2E_OVERFLOWEXCEPTION |
2612 OPC_2E_INEXACTEXCEPTION);
2613 else Set_inexactflag();
2614 return (OPC_2E_OVERFLOWEXCEPTION);
2616 inexact = TRUE;
2617 Set_overflowflag();
2618 Sgl_setoverflow(resultp1);
2619 } else if (result_exponent <= 0) { /* underflow case */
2620 if (Is_underflowtrap_enabled()) {
2622 * Adjust bias of result
2624 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625 Sgl_copytoptr(resultp1,dstptr);
2626 if (inexact)
2627 if (Is_inexacttrap_enabled())
2628 return (OPC_2E_UNDERFLOWEXCEPTION |
2629 OPC_2E_INEXACTEXCEPTION);
2630 else Set_inexactflag();
2631 return(OPC_2E_UNDERFLOWEXCEPTION);
2633 else if (inexact && is_tiny) Set_underflowflag();
2635 else Sgl_set_exponent(resultp1,result_exponent);
2636 Sgl_copytoptr(resultp1,dstptr);
2637 if (inexact)
2638 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639 else Set_inexactflag();
2640 return(NOEXCEPTION);