arch/parisc/math-emu/fmpyfadd.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
   4  *
   5  * Floating-point emulation code
   6  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
   7  */
   8 /*
   9  * BEGIN_DESC
  10  *
  11  *  File:
  12  *      @(#)    pa/spmath/fmpyfadd.c            $Revision: 1.1 $
  13  *
  14  *  Purpose:
  15  *      Double Floating-point Multiply Fused Add
  16  *      Double Floating-point Multiply Negate Fused Add
  17  *      Single Floating-point Multiply Fused Add
  18  *      Single Floating-point Multiply Negate Fused Add
  19  *
  20  *  External Interfaces:
  21  *      dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  22  *      dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  23  *      sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  24  *      sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  25  *
  26  *  Internal Interfaces:
  27  *
  28  *  Theory:
  29  *      <<please update with a overview of the operation of this file>>
  30  *
  31  * END_DESC
  32 */
  33
  34
  35 #include "float.h"
  36 #include "sgl_float.h"
  37 #include "dbl_float.h"
  38
  39
  40 /*
  41  *  Double Floating-point Multiply Fused Add
  42  */
  43
  44 int
  45 dbl_fmpyfadd(
  46             dbl_floating_point *src1ptr,
  47             dbl_floating_point *src2ptr,
  48             dbl_floating_point *src3ptr,
  49             unsigned int *status,
  50             dbl_floating_point *dstptr)
  51 {
  52         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
  53         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
  54         unsigned int rightp1, rightp2, rightp3, rightp4;
  55         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
  56         register int mpy_exponent, add_exponent, count;
  57         boolean inexact = FALSE, is_tiny = FALSE;
  58
  59         unsigned int signlessleft1, signlessright1, save;
  60         register int result_exponent, diff_exponent;
  61         int sign_save, jumpsize;
  62
  63         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
  64         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
  65         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
  66
  67         /*
  68          * set sign bit of result of multiply
  69          */
  70         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  71                 Dbl_setnegativezerop1(resultp1);
  72         else Dbl_setzerop1(resultp1);
  73
  74         /*
  75          * Generate multiply exponent
  76          */
  77         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
  78
  79         /*
  80          * check first operand for NaN's or infinity
  81          */
  82         if (Dbl_isinfinity_exponent(opnd1p1)) {
  83                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  84                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
  85                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
  86                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
  87                                         /*
  88                                          * invalid since operands are infinity
  89                                          * and zero
  90                                          */
  91                                         if (Is_invalidtrap_enabled())
  92                                                 return(OPC_2E_INVALIDEXCEPTION);
  93                                         Set_invalidflag();
  94                                         Dbl_makequietnan(resultp1,resultp2);
  95                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
  96                                         return(NOEXCEPTION);
  97                                 }
  98                                 /*
  99                                  * Check third operand for infinity with a
 100                                  *  sign opposite of the multiply result
 101                                  */
 102                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 103                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 104                                         /*
 105                                          * invalid since attempting a magnitude
 106                                          * subtraction of infinities
 107                                          */
 108                                         if (Is_invalidtrap_enabled())
 109                                                 return(OPC_2E_INVALIDEXCEPTION);
 110                                         Set_invalidflag();
 111                                         Dbl_makequietnan(resultp1,resultp2);
 112                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 113                                         return(NOEXCEPTION);
 114                                 }
 115
 116                                 /*
 117                                  * return infinity
 118                                  */
 119                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 120                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 121                                 return(NOEXCEPTION);
 122                         }
 123                 }
 124                 else {
 125                         /*
 126                          * is NaN; signaling or quiet?
 127                          */
 128                         if (Dbl_isone_signaling(opnd1p1)) {
 129                                 /* trap if INVALIDTRAP enabled */
 130                                 if (Is_invalidtrap_enabled())
 131                                         return(OPC_2E_INVALIDEXCEPTION);
 132                                 /* make NaN quiet */
 133                                 Set_invalidflag();
 134                                 Dbl_set_quiet(opnd1p1);
 135                         }
 136                         /*
 137                          * is second operand a signaling NaN?
 138                          */
 139                         else if (Dbl_is_signalingnan(opnd2p1)) {
 140                                 /* trap if INVALIDTRAP enabled */
 141                                 if (Is_invalidtrap_enabled())
 142                                         return(OPC_2E_INVALIDEXCEPTION);
 143                                 /* make NaN quiet */
 144                                 Set_invalidflag();
 145                                 Dbl_set_quiet(opnd2p1);
 146                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 147                                 return(NOEXCEPTION);
 148                         }
 149                         /*
 150                          * is third operand a signaling NaN?
 151                          */
 152                         else if (Dbl_is_signalingnan(opnd3p1)) {
 153                                 /* trap if INVALIDTRAP enabled */
 154                                 if (Is_invalidtrap_enabled())
 155                                         return(OPC_2E_INVALIDEXCEPTION);
 156                                 /* make NaN quiet */
 157                                 Set_invalidflag();
 158                                 Dbl_set_quiet(opnd3p1);
 159                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 160                                 return(NOEXCEPTION);
 161                         }
 162                         /*
 163                          * return quiet NaN
 164                          */
 165                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 166                         return(NOEXCEPTION);
 167                 }
 168         }
 169
 170         /*
 171          * check second operand for NaN's or infinity
 172          */
 173         if (Dbl_isinfinity_exponent(opnd2p1)) {
 174                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 175                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 176                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 177                                         /*
 178                                          * invalid since multiply operands are
 179                                          * zero & infinity
 180                                          */
 181                                         if (Is_invalidtrap_enabled())
 182                                                 return(OPC_2E_INVALIDEXCEPTION);
 183                                         Set_invalidflag();
 184                                         Dbl_makequietnan(opnd2p1,opnd2p2);
 185                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 186                                         return(NOEXCEPTION);
 187                                 }
 188
 189                                 /*
 190                                  * Check third operand for infinity with a
 191                                  *  sign opposite of the multiply result
 192                                  */
 193                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 194                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 195                                         /*
 196                                          * invalid since attempting a magnitude
 197                                          * subtraction of infinities
 198                                          */
 199                                         if (Is_invalidtrap_enabled())
 200                                                 return(OPC_2E_INVALIDEXCEPTION);
 201                                         Set_invalidflag();
 202                                         Dbl_makequietnan(resultp1,resultp2);
 203                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 204                                         return(NOEXCEPTION);
 205                                 }
 206
 207                                 /*
 208                                  * return infinity
 209                                  */
 210                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 211                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 212                                 return(NOEXCEPTION);
 213                         }
 214                 }
 215                 else {
 216                         /*
 217                          * is NaN; signaling or quiet?
 218                          */
 219                         if (Dbl_isone_signaling(opnd2p1)) {
 220                                 /* trap if INVALIDTRAP enabled */
 221                                 if (Is_invalidtrap_enabled())
 222                                         return(OPC_2E_INVALIDEXCEPTION);
 223                                 /* make NaN quiet */
 224                                 Set_invalidflag();
 225                                 Dbl_set_quiet(opnd2p1);
 226                         }
 227                         /*
 228                          * is third operand a signaling NaN?
 229                          */
 230                         else if (Dbl_is_signalingnan(opnd3p1)) {
 231                                 /* trap if INVALIDTRAP enabled */
 232                                 if (Is_invalidtrap_enabled())
 233                                                 return(OPC_2E_INVALIDEXCEPTION);
 234                                 /* make NaN quiet */
 235                                 Set_invalidflag();
 236                                 Dbl_set_quiet(opnd3p1);
 237                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 238                                 return(NOEXCEPTION);
 239                         }
 240                         /*
 241                          * return quiet NaN
 242                          */
 243                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 244                         return(NOEXCEPTION);
 245                 }
 246         }
 247
 248         /*
 249          * check third operand for NaN's or infinity
 250          */
 251         if (Dbl_isinfinity_exponent(opnd3p1)) {
 252                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 253                         /* return infinity */
 254                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 255                         return(NOEXCEPTION);
 256                 } else {
 257                         /*
 258                          * is NaN; signaling or quiet?
 259                          */
 260                         if (Dbl_isone_signaling(opnd3p1)) {
 261                                 /* trap if INVALIDTRAP enabled */
 262                                 if (Is_invalidtrap_enabled())
 263                                         return(OPC_2E_INVALIDEXCEPTION);
 264                                 /* make NaN quiet */
 265                                 Set_invalidflag();
 266                                 Dbl_set_quiet(opnd3p1);
 267                         }
 268                         /*
 269                          * return quiet NaN
 270                          */
 271                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 272                         return(NOEXCEPTION);
 273                 }
 274         }
 275
 276         /*
 277          * Generate multiply mantissa
 278          */
 279         if (Dbl_isnotzero_exponent(opnd1p1)) {
 280                 /* set hidden bit */
 281                 Dbl_clear_signexponent_set_hidden(opnd1p1);
 282         }
 283         else {
 284                 /* check for zero */
 285                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 286                         /*
 287                          * Perform the add opnd3 with zero here.
 288                          */
 289                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 290                                 if (Is_rounding_mode(ROUNDMINUS)) {
 291                                         Dbl_or_signs(opnd3p1,resultp1);
 292                                 } else {
 293                                         Dbl_and_signs(opnd3p1,resultp1);
 294                                 }
 295                         }
 296                         /*
 297                          * Now let's check for trapped underflow case.
 298                          */
 299                         else if (Dbl_iszero_exponent(opnd3p1) &&
 300                                  Is_underflowtrap_enabled()) {
 301                                 /* need to normalize results mantissa */
 302                                 sign_save = Dbl_signextendedsign(opnd3p1);
 303                                 result_exponent = 0;
 304                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 305                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 306                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 307                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 308                                                         unfl);
 309                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 310                                 /* inexact = FALSE */
 311                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 312                         }
 313                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 314                         return(NOEXCEPTION);
 315                 }
 316                 /* is denormalized, adjust exponent */
 317                 Dbl_clear_signexponent(opnd1p1);
 318                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
 319                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 320         }
 321         /* opnd2 needs to have hidden bit set with msb in hidden bit */
 322         if (Dbl_isnotzero_exponent(opnd2p1)) {
 323                 Dbl_clear_signexponent_set_hidden(opnd2p1);
 324         }
 325         else {
 326                 /* check for zero */
 327                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 328                         /*
 329                          * Perform the add opnd3 with zero here.
 330                          */
 331                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 332                                 if (Is_rounding_mode(ROUNDMINUS)) {
 333                                         Dbl_or_signs(opnd3p1,resultp1);
 334                                 } else {
 335                                         Dbl_and_signs(opnd3p1,resultp1);
 336                                 }
 337                         }
 338                         /*
 339                          * Now let's check for trapped underflow case.
 340                          */
 341                         else if (Dbl_iszero_exponent(opnd3p1) &&
 342                             Is_underflowtrap_enabled()) {
 343                                 /* need to normalize results mantissa */
 344                                 sign_save = Dbl_signextendedsign(opnd3p1);
 345                                 result_exponent = 0;
 346                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 347                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 348                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 349                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 350                                                         unfl);
 351                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 352                                 /* inexact = FALSE */
 353                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 354                         }
 355                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 356                         return(NOEXCEPTION);
 357                 }
 358                 /* is denormalized; want to normalize */
 359                 Dbl_clear_signexponent(opnd2p1);
 360                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
 361                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
 362         }
 363
 364         /* Multiply the first two source mantissas together */
 365
 366         /*
 367          * The intermediate result will be kept in tmpres,
 368          * which needs enough room for 106 bits of mantissa,
 369          * so lets call it a Double extended.
 370          */
 371         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 372
 373         /*
 374          * Four bits at a time are inspected in each loop, and a
 375          * simple shift and add multiply algorithm is used.
 376          */
 377         for (count = DBL_P-1; count >= 0; count -= 4) {
 378                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 379                 if (Dbit28p2(opnd1p2)) {
 380                         /* Fourword_add should be an ADD followed by 3 ADDC's */
 381                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 382                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
 383                 }
 384                 if (Dbit29p2(opnd1p2)) {
 385                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 386                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
 387                 }
 388                 if (Dbit30p2(opnd1p2)) {
 389                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 390                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
 391                 }
 392                 if (Dbit31p2(opnd1p2)) {
 393                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 394                          opnd2p1, opnd2p2, 0, 0);
 395                 }
 396                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
 397         }
 398         if (Is_dexthiddenoverflow(tmpresp1)) {
 399                 /* result mantissa >= 2 (mantissa overflow) */
 400                 mpy_exponent++;
 401                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 402         }
 403
 404         /*
 405          * Restore the sign of the mpy result which was saved in resultp1.
 406          * The exponent will continue to be kept in mpy_exponent.
 407          */
 408         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
 409
 410         /*
 411          * No rounding is required, since the result of the multiply
 412          * is exact in the extended format.
 413          */
 414
 415         /*
 416          * Now we are ready to perform the add portion of the operation.
 417          *
 418          * The exponents need to be kept as integers for now, since the
 419          * multiply result might not fit into the exponent field.  We
 420          * can't overflow or underflow because of this yet, since the
 421          * add could bring the final result back into range.
 422          */
 423         add_exponent = Dbl_exponent(opnd3p1);
 424
 425         /*
 426          * Check for denormalized or zero add operand.
 427          */
 428         if (add_exponent == 0) {
 429                 /* check for zero */
 430                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 431                         /* right is zero */
 432                         /* Left can't be zero and must be result.
 433                          *
 434                          * The final result is now in tmpres and mpy_exponent,
 435                          * and needs to be rounded and squeezed back into
 436                          * double precision format from double extended.
 437                          */
 438                         result_exponent = mpy_exponent;
 439                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 440                                 resultp1,resultp2,resultp3,resultp4);
 441                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
 442                         goto round;
 443                 }
 444
 445                 /*
 446                  * Neither are zeroes.
 447                  * Adjust exponent and normalize add operand.
 448                  */
 449                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
 450                 Dbl_clear_signexponent(opnd3p1);
 451                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 452                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
 453                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
 454         } else {
 455                 Dbl_clear_exponent_set_hidden(opnd3p1);
 456         }
 457         /*
 458          * Copy opnd3 to the double extended variable called right.
 459          */
 460         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
 461
 462         /*
 463          * A zero "save" helps discover equal operands (for later),
 464          * and is used in swapping operands (if needed).
 465          */
 466         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
 467
 468         /*
 469          * Compare magnitude of operands.
 470          */
 471         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
 472         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
 473         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
 474             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
 475                 /*
 476                  * Set the left operand to the larger one by XOR swap.
 477                  * First finish the first word "save".
 478                  */
 479                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
 480                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
 481                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
 482                         rightp2,rightp3,rightp4);
 483                 /* also setup exponents used in rest of routine */
 484                 diff_exponent = add_exponent - mpy_exponent;
 485                 result_exponent = add_exponent;
 486         } else {
 487                 /* also setup exponents used in rest of routine */
 488                 diff_exponent = mpy_exponent - add_exponent;
 489                 result_exponent = mpy_exponent;
 490         }
 491         /* Invariant: left is not smaller than right. */
 492
 493         /*
 494          * Special case alignment of operands that would force alignment
 495          * beyond the extent of the extension.  A further optimization
 496          * could special case this but only reduces the path length for
 497          * this infrequent case.
 498          */
 499         if (diff_exponent > DBLEXT_THRESHOLD) {
 500                 diff_exponent = DBLEXT_THRESHOLD;
 501         }
 502
 503         /* Align right operand by shifting it to the right */
 504         Dblext_clear_sign(rightp1);
 505         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
 506                 /*shifted by*/diff_exponent);
 507
 508         /* Treat sum and difference of the operands separately. */
 509         if ((int)save < 0) {
 510                 /*
 511                  * Difference of the two operands.  Overflow can occur if the
 512                  * multiply overflowed.  A borrow can occur out of the hidden
 513                  * bit and force a post normalization phase.
 514                  */
 515                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 516                         rightp1,rightp2,rightp3,rightp4,
 517                         resultp1,resultp2,resultp3,resultp4);
 518                 sign_save = Dbl_signextendedsign(resultp1);
 519                 if (Dbl_iszero_hidden(resultp1)) {
 520                         /* Handle normalization */
 521                 /* A straightforward algorithm would now shift the
 522                  * result and extension left until the hidden bit
 523                  * becomes one.  Not all of the extension bits need
 524                  * participate in the shift.  Only the two most
 525                  * significant bits (round and guard) are needed.
 526                  * If only a single shift is needed then the guard
 527                  * bit becomes a significant low order bit and the
 528                  * extension must participate in the rounding.
 529                  * If more than a single shift is needed, then all
 530                  * bits to the right of the guard bit are zeros,
 531                  * and the guard bit may or may not be zero. */
 532                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 533                                 resultp4);
 534
 535                         /* Need to check for a zero result.  The sign and
 536                          * exponent fields have already been zeroed.  The more
 537                          * efficient test of the full object can be used.
 538                          */
 539                          if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
 540                                 /* Must have been "x-x" or "x+(-x)". */
 541                                 if (Is_rounding_mode(ROUNDMINUS))
 542                                         Dbl_setone_sign(resultp1);
 543                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 544                                 return(NOEXCEPTION);
 545                         }
 546                         result_exponent--;
 547
 548                         /* Look to see if normalization is finished. */
 549                         if (Dbl_isone_hidden(resultp1)) {
 550                                 /* No further normalization is needed */
 551                                 goto round;
 552                         }
 553
 554                         /* Discover first one bit to determine shift amount.
 555                          * Use a modified binary search.  We have already
 556                          * shifted the result one position right and still
 557                          * not found a one so the remainder of the extension
 558                          * must be zero and simplifies rounding. */
 559                         /* Scan bytes */
 560                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
 561                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
 562                                 result_exponent -= 8;
 563                         }
 564                         /* Now narrow it down to the nibble */
 565                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
 566                                 /* The lower nibble contains the
 567                                  * normalizing one */
 568                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
 569                                 result_exponent -= 4;
 570                         }
 571                         /* Select case where first bit is set (already
 572                          * normalized) otherwise select the proper shift. */
 573                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
 574                         if (jumpsize <= 7) switch(jumpsize) {
 575                         case 1:
 576                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
 577                                         resultp4);
 578                                 result_exponent -= 3;
 579                                 break;
 580                         case 2:
 581                         case 3:
 582                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
 583                                         resultp4);
 584                                 result_exponent -= 2;
 585                                 break;
 586                         case 4:
 587                         case 5:
 588                         case 6:
 589                         case 7:
 590                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 591                                         resultp4);
 592                                 result_exponent -= 1;
 593                                 break;
 594                         }
 595                 } /* end if (hidden...)... */
 596         /* Fall through and round */
 597         } /* end if (save < 0)... */
 598         else {
 599                 /* Add magnitudes */
 600                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 601                         rightp1,rightp2,rightp3,rightp4,
 602                         /*to*/resultp1,resultp2,resultp3,resultp4);
 603                 sign_save = Dbl_signextendedsign(resultp1);
 604                 if (Dbl_isone_hiddenoverflow(resultp1)) {
 605                         /* Prenormalization required. */
 606                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
 607                                 resultp4);
 608                         result_exponent++;
 609                 } /* end if hiddenoverflow... */
 610         } /* end else ...add magnitudes... */
 611
 612         /* Round the result.  If the extension and lower two words are
 613          * all zeros, then the result is exact.  Otherwise round in the
 614          * correct direction.  Underflow is possible. If a postnormalization
 615          * is necessary, then the mantissa is all zeros so no shift is needed.
 616          */
 617   round:
 618         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
 619                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
 620                         result_exponent,is_tiny);
 621         }
 622         Dbl_set_sign(resultp1,/*using*/sign_save);
 623         if (Dblext_isnotzero_mantissap3(resultp3) ||
 624             Dblext_isnotzero_mantissap4(resultp4)) {
 625                 inexact = TRUE;
 626                 switch(Rounding_mode()) {
 627                 case ROUNDNEAREST: /* The default. */
 628                         if (Dblext_isone_highp3(resultp3)) {
 629                                 /* at least 1/2 ulp */
 630                                 if (Dblext_isnotzero_low31p3(resultp3) ||
 631                                     Dblext_isnotzero_mantissap4(resultp4) ||
 632                                     Dblext_isone_lowp2(resultp2)) {
 633                                         /* either exactly half way and odd or
 634                                          * more than 1/2ulp */
 635                                         Dbl_increment(resultp1,resultp2);
 636                                 }
 637                         }
 638                         break;
 639
 640                 case ROUNDPLUS:
 641                         if (Dbl_iszero_sign(resultp1)) {
 642                                 /* Round up positive results */
 643                                 Dbl_increment(resultp1,resultp2);
 644                         }
 645                         break;
 646
 647                 case ROUNDMINUS:
 648                         if (Dbl_isone_sign(resultp1)) {
 649                                 /* Round down negative results */
 650                                 Dbl_increment(resultp1,resultp2);
 651                         }
 652
 653                 case ROUNDZERO:;
 654                         /* truncate is simple */
 655                 } /* end switch... */
 656                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
 657         }
 658         if (result_exponent >= DBL_INFINITY_EXPONENT) {
 659                 /* trap if OVERFLOWTRAP enabled */
 660                 if (Is_overflowtrap_enabled()) {
 661                         /*
 662                          * Adjust bias of result
 663                          */
 664                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
 665                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 666                         if (inexact)
 667                             if (Is_inexacttrap_enabled())
 668                                 return (OPC_2E_OVERFLOWEXCEPTION |
 669                                         OPC_2E_INEXACTEXCEPTION);
 670                             else Set_inexactflag();
 671                         return (OPC_2E_OVERFLOWEXCEPTION);
 672                 }
 673                 inexact = TRUE;
 674                 Set_overflowflag();
 675                 /* set result to infinity or largest number */
 676                 Dbl_setoverflow(resultp1,resultp2);
 677
 678         } else if (result_exponent <= 0) {      /* underflow case */
 679                 if (Is_underflowtrap_enabled()) {
 680                         /*
 681                          * Adjust bias of result
 682                          */
 683                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
 684                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 685                         if (inexact)
 686                             if (Is_inexacttrap_enabled())
 687                                 return (OPC_2E_UNDERFLOWEXCEPTION |
 688                                         OPC_2E_INEXACTEXCEPTION);
 689                             else Set_inexactflag();
 690                         return(OPC_2E_UNDERFLOWEXCEPTION);
 691                 }
 692                 else if (inexact && is_tiny) Set_underflowflag();
 693         }
 694         else Dbl_set_exponent(resultp1,result_exponent);
 695         Dbl_copytoptr(resultp1,resultp2,dstptr);
 696         if (inexact)
 697                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
 698                 else Set_inexactflag();
 699         return(NOEXCEPTION);
 700 }
 701
 702 /*
 703  *  Double Floating-point Multiply Negate Fused Add
 704  */
 705
 706 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
 707
 708 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
 709 unsigned int *status;
 710 {
 711         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
 712         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
 713         unsigned int rightp1, rightp2, rightp3, rightp4;
 714         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
 715         register int mpy_exponent, add_exponent, count;
 716         boolean inexact = FALSE, is_tiny = FALSE;
 717
 718         unsigned int signlessleft1, signlessright1, save;
 719         register int result_exponent, diff_exponent;
 720         int sign_save, jumpsize;
 721
 722         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
 723         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
 724         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
 725
 726         /*
 727          * set sign bit of result of multiply
 728          */
 729         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
 730                 Dbl_setzerop1(resultp1);
 731         else
 732                 Dbl_setnegativezerop1(resultp1);
 733
 734         /*
 735          * Generate multiply exponent
 736          */
 737         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
 738
 739         /*
 740          * check first operand for NaN's or infinity
 741          */
 742         if (Dbl_isinfinity_exponent(opnd1p1)) {
 743                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 744                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
 745                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
 746                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
 747                                         /*
 748                                          * invalid since operands are infinity
 749                                          * and zero
 750                                          */
 751                                         if (Is_invalidtrap_enabled())
 752                                                 return(OPC_2E_INVALIDEXCEPTION);
 753                                         Set_invalidflag();
 754                                         Dbl_makequietnan(resultp1,resultp2);
 755                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 756                                         return(NOEXCEPTION);
 757                                 }
 758                                 /*
 759                                  * Check third operand for infinity with a
 760                                  *  sign opposite of the multiply result
 761                                  */
 762                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 763                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 764                                         /*
 765                                          * invalid since attempting a magnitude
 766                                          * subtraction of infinities
 767                                          */
 768                                         if (Is_invalidtrap_enabled())
 769                                                 return(OPC_2E_INVALIDEXCEPTION);
 770                                         Set_invalidflag();
 771                                         Dbl_makequietnan(resultp1,resultp2);
 772                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 773                                         return(NOEXCEPTION);
 774                                 }
 775
 776                                 /*
 777                                  * return infinity
 778                                  */
 779                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 780                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 781                                 return(NOEXCEPTION);
 782                         }
 783                 }
 784                 else {
 785                         /*
 786                          * is NaN; signaling or quiet?
 787                          */
 788                         if (Dbl_isone_signaling(opnd1p1)) {
 789                                 /* trap if INVALIDTRAP enabled */
 790                                 if (Is_invalidtrap_enabled())
 791                                         return(OPC_2E_INVALIDEXCEPTION);
 792                                 /* make NaN quiet */
 793                                 Set_invalidflag();
 794                                 Dbl_set_quiet(opnd1p1);
 795                         }
 796                         /*
 797                          * is second operand a signaling NaN?
 798                          */
 799                         else if (Dbl_is_signalingnan(opnd2p1)) {
 800                                 /* trap if INVALIDTRAP enabled */
 801                                 if (Is_invalidtrap_enabled())
 802                                         return(OPC_2E_INVALIDEXCEPTION);
 803                                 /* make NaN quiet */
 804                                 Set_invalidflag();
 805                                 Dbl_set_quiet(opnd2p1);
 806                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 807                                 return(NOEXCEPTION);
 808                         }
 809                         /*
 810                          * is third operand a signaling NaN?
 811                          */
 812                         else if (Dbl_is_signalingnan(opnd3p1)) {
 813                                 /* trap if INVALIDTRAP enabled */
 814                                 if (Is_invalidtrap_enabled())
 815                                         return(OPC_2E_INVALIDEXCEPTION);
 816                                 /* make NaN quiet */
 817                                 Set_invalidflag();
 818                                 Dbl_set_quiet(opnd3p1);
 819                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 820                                 return(NOEXCEPTION);
 821                         }
 822                         /*
 823                          * return quiet NaN
 824                          */
 825                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 826                         return(NOEXCEPTION);
 827                 }
 828         }
 829
 830         /*
 831          * check second operand for NaN's or infinity
 832          */
 833         if (Dbl_isinfinity_exponent(opnd2p1)) {
 834                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 835                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 836                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 837                                         /*
 838                                          * invalid since multiply operands are
 839                                          * zero & infinity
 840                                          */
 841                                         if (Is_invalidtrap_enabled())
 842                                                 return(OPC_2E_INVALIDEXCEPTION);
 843                                         Set_invalidflag();
 844                                         Dbl_makequietnan(opnd2p1,opnd2p2);
 845                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 846                                         return(NOEXCEPTION);
 847                                 }
 848
 849                                 /*
 850                                  * Check third operand for infinity with a
 851                                  *  sign opposite of the multiply result
 852                                  */
 853                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 854                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 855                                         /*
 856                                          * invalid since attempting a magnitude
 857                                          * subtraction of infinities
 858                                          */
 859                                         if (Is_invalidtrap_enabled())
 860                                                 return(OPC_2E_INVALIDEXCEPTION);
 861                                         Set_invalidflag();
 862                                         Dbl_makequietnan(resultp1,resultp2);
 863                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 864                                         return(NOEXCEPTION);
 865                                 }
 866
 867                                 /*
 868                                  * return infinity
 869                                  */
 870                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 871                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 872                                 return(NOEXCEPTION);
 873                         }
 874                 }
 875                 else {
 876                         /*
 877                          * is NaN; signaling or quiet?
 878                          */
 879                         if (Dbl_isone_signaling(opnd2p1)) {
 880                                 /* trap if INVALIDTRAP enabled */
 881                                 if (Is_invalidtrap_enabled())
 882                                         return(OPC_2E_INVALIDEXCEPTION);
 883                                 /* make NaN quiet */
 884                                 Set_invalidflag();
 885                                 Dbl_set_quiet(opnd2p1);
 886                         }
 887                         /*
 888                          * is third operand a signaling NaN?
 889                          */
 890                         else if (Dbl_is_signalingnan(opnd3p1)) {
 891                                 /* trap if INVALIDTRAP enabled */
 892                                 if (Is_invalidtrap_enabled())
 893                                                 return(OPC_2E_INVALIDEXCEPTION);
 894                                 /* make NaN quiet */
 895                                 Set_invalidflag();
 896                                 Dbl_set_quiet(opnd3p1);
 897                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 898                                 return(NOEXCEPTION);
 899                         }
 900                         /*
 901                          * return quiet NaN
 902                          */
 903                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 904                         return(NOEXCEPTION);
 905                 }
 906         }
 907
 908         /*
 909          * check third operand for NaN's or infinity
 910          */
 911         if (Dbl_isinfinity_exponent(opnd3p1)) {
 912                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 913                         /* return infinity */
 914                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 915                         return(NOEXCEPTION);
 916                 } else {
 917                         /*
 918                          * is NaN; signaling or quiet?
 919                          */
 920                         if (Dbl_isone_signaling(opnd3p1)) {
 921                                 /* trap if INVALIDTRAP enabled */
 922                                 if (Is_invalidtrap_enabled())
 923                                         return(OPC_2E_INVALIDEXCEPTION);
 924                                 /* make NaN quiet */
 925                                 Set_invalidflag();
 926                                 Dbl_set_quiet(opnd3p1);
 927                         }
 928                         /*
 929                          * return quiet NaN
 930                          */
 931                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 932                         return(NOEXCEPTION);
 933                 }
 934         }
 935
 936         /*
 937          * Generate multiply mantissa
 938          */
 939         if (Dbl_isnotzero_exponent(opnd1p1)) {
 940                 /* set hidden bit */
 941                 Dbl_clear_signexponent_set_hidden(opnd1p1);
 942         }
 943         else {
 944                 /* check for zero */
 945                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 946                         /*
 947                          * Perform the add opnd3 with zero here.
 948                          */
 949                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 950                                 if (Is_rounding_mode(ROUNDMINUS)) {
 951                                         Dbl_or_signs(opnd3p1,resultp1);
 952                                 } else {
 953                                         Dbl_and_signs(opnd3p1,resultp1);
 954                                 }
 955                         }
 956                         /*
 957                          * Now let's check for trapped underflow case.
 958                          */
 959                         else if (Dbl_iszero_exponent(opnd3p1) &&
 960                                  Is_underflowtrap_enabled()) {
 961                                 /* need to normalize results mantissa */
 962                                 sign_save = Dbl_signextendedsign(opnd3p1);
 963                                 result_exponent = 0;
 964                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 965                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 966                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 967                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 968                                                         unfl);
 969                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 970                                 /* inexact = FALSE */
 971                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 972                         }
 973                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 974                         return(NOEXCEPTION);
 975                 }
 976                 /* is denormalized, adjust exponent */
 977                 Dbl_clear_signexponent(opnd1p1);
 978                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
 979                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 980         }
 981         /* opnd2 needs to have hidden bit set with msb in hidden bit */
 982         if (Dbl_isnotzero_exponent(opnd2p1)) {
 983                 Dbl_clear_signexponent_set_hidden(opnd2p1);
 984         }
 985         else {
 986                 /* check for zero */
 987                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 988                         /*
 989                          * Perform the add opnd3 with zero here.
 990                          */
 991                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 992                                 if (Is_rounding_mode(ROUNDMINUS)) {
 993                                         Dbl_or_signs(opnd3p1,resultp1);
 994                                 } else {
 995                                         Dbl_and_signs(opnd3p1,resultp1);
 996                                 }
 997                         }
 998                         /*
 999                          * Now let's check for trapped underflow case.
1000                          */
1001                         else if (Dbl_iszero_exponent(opnd3p1) &&
1002                             Is_underflowtrap_enabled()) {
1003                                 /* need to normalize results mantissa */
1004                                 sign_save = Dbl_signextendedsign(opnd3p1);
1005                                 result_exponent = 0;
1006                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010                                                         unfl);
1011                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012                                 /* inexact = FALSE */
1013                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1014                         }
1015                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016                         return(NOEXCEPTION);
1017                 }
1018                 /* is denormalized; want to normalize */
1019                 Dbl_clear_signexponent(opnd2p1);
1020                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022         }
1023
1024         /* Multiply the first two source mantissas together */
1025
1026         /*
1027          * The intermediate result will be kept in tmpres,
1028          * which needs enough room for 106 bits of mantissa,
1029          * so lets call it a Double extended.
1030          */
1031         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032
1033         /*
1034          * Four bits at a time are inspected in each loop, and a
1035          * simple shift and add multiply algorithm is used.
1036          */
1037         for (count = DBL_P-1; count >= 0; count -= 4) {
1038                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039                 if (Dbit28p2(opnd1p2)) {
1040                         /* Fourword_add should be an ADD followed by 3 ADDC's */
1041                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043                 }
1044                 if (Dbit29p2(opnd1p2)) {
1045                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047                 }
1048                 if (Dbit30p2(opnd1p2)) {
1049                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051                 }
1052                 if (Dbit31p2(opnd1p2)) {
1053                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054                          opnd2p1, opnd2p2, 0, 0);
1055                 }
1056                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057         }
1058         if (Is_dexthiddenoverflow(tmpresp1)) {
1059                 /* result mantissa >= 2 (mantissa overflow) */
1060                 mpy_exponent++;
1061                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062         }
1063
1064         /*
1065          * Restore the sign of the mpy result which was saved in resultp1.
1066          * The exponent will continue to be kept in mpy_exponent.
1067          */
1068         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069
1070         /*
1071          * No rounding is required, since the result of the multiply
1072          * is exact in the extended format.
1073          */
1074
1075         /*
1076          * Now we are ready to perform the add portion of the operation.
1077          *
1078          * The exponents need to be kept as integers for now, since the
1079          * multiply result might not fit into the exponent field.  We
1080          * can't overflow or underflow because of this yet, since the
1081          * add could bring the final result back into range.
1082          */
1083         add_exponent = Dbl_exponent(opnd3p1);
1084
1085         /*
1086          * Check for denormalized or zero add operand.
1087          */
1088         if (add_exponent == 0) {
1089                 /* check for zero */
1090                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091                         /* right is zero */
1092                         /* Left can't be zero and must be result.
1093                          *
1094                          * The final result is now in tmpres and mpy_exponent,
1095                          * and needs to be rounded and squeezed back into
1096                          * double precision format from double extended.
1097                          */
1098                         result_exponent = mpy_exponent;
1099                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100                                 resultp1,resultp2,resultp3,resultp4);
1101                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102                         goto round;
1103                 }
1104
1105                 /*
1106                  * Neither are zeroes.
1107                  * Adjust exponent and normalize add operand.
1108                  */
1109                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
1110                 Dbl_clear_signexponent(opnd3p1);
1111                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
1114         } else {
1115                 Dbl_clear_exponent_set_hidden(opnd3p1);
1116         }
1117         /*
1118          * Copy opnd3 to the double extended variable called right.
1119          */
1120         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121
1122         /*
1123          * A zero "save" helps discover equal operands (for later),
1124          * and is used in swapping operands (if needed).
1125          */
1126         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127
1128         /*
1129          * Compare magnitude of operands.
1130          */
1131         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135                 /*
1136                  * Set the left operand to the larger one by XOR swap.
1137                  * First finish the first word "save".
1138                  */
1139                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142                         rightp2,rightp3,rightp4);
1143                 /* also setup exponents used in rest of routine */
1144                 diff_exponent = add_exponent - mpy_exponent;
1145                 result_exponent = add_exponent;
1146         } else {
1147                 /* also setup exponents used in rest of routine */
1148                 diff_exponent = mpy_exponent - add_exponent;
1149                 result_exponent = mpy_exponent;
1150         }
1151         /* Invariant: left is not smaller than right. */
1152
1153         /*
1154          * Special case alignment of operands that would force alignment
1155          * beyond the extent of the extension.  A further optimization
1156          * could special case this but only reduces the path length for
1157          * this infrequent case.
1158          */
1159         if (diff_exponent > DBLEXT_THRESHOLD) {
1160                 diff_exponent = DBLEXT_THRESHOLD;
1161         }
1162
1163         /* Align right operand by shifting it to the right */
1164         Dblext_clear_sign(rightp1);
1165         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166                 /*shifted by*/diff_exponent);
1167
1168         /* Treat sum and difference of the operands separately. */
1169         if ((int)save < 0) {
1170                 /*
1171                  * Difference of the two operands.  Overflow can occur if the
1172                  * multiply overflowed.  A borrow can occur out of the hidden
1173                  * bit and force a post normalization phase.
1174                  */
1175                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176                         rightp1,rightp2,rightp3,rightp4,
1177                         resultp1,resultp2,resultp3,resultp4);
1178                 sign_save = Dbl_signextendedsign(resultp1);
1179                 if (Dbl_iszero_hidden(resultp1)) {
1180                         /* Handle normalization */
1181                 /* A straightforward algorithm would now shift the
1182                  * result and extension left until the hidden bit
1183                  * becomes one.  Not all of the extension bits need
1184                  * participate in the shift.  Only the two most
1185                  * significant bits (round and guard) are needed.
1186                  * If only a single shift is needed then the guard
1187                  * bit becomes a significant low order bit and the
1188                  * extension must participate in the rounding.
1189                  * If more than a single shift is needed, then all
1190                  * bits to the right of the guard bit are zeros,
1191                  * and the guard bit may or may not be zero. */
1192                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193                                 resultp4);
1194
1195                         /* Need to check for a zero result.  The sign and
1196                          * exponent fields have already been zeroed.  The more
1197                          * efficient test of the full object can be used.
1198                          */
1199                          if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200                                 /* Must have been "x-x" or "x+(-x)". */
1201                                 if (Is_rounding_mode(ROUNDMINUS))
1202                                         Dbl_setone_sign(resultp1);
1203                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
1204                                 return(NOEXCEPTION);
1205                         }
1206                         result_exponent--;
1207
1208                         /* Look to see if normalization is finished. */
1209                         if (Dbl_isone_hidden(resultp1)) {
1210                                 /* No further normalization is needed */
1211                                 goto round;
1212                         }
1213
1214                         /* Discover first one bit to determine shift amount.
1215                          * Use a modified binary search.  We have already
1216                          * shifted the result one position right and still
1217                          * not found a one so the remainder of the extension
1218                          * must be zero and simplifies rounding. */
1219                         /* Scan bytes */
1220                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222                                 result_exponent -= 8;
1223                         }
1224                         /* Now narrow it down to the nibble */
1225                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226                                 /* The lower nibble contains the
1227                                  * normalizing one */
1228                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229                                 result_exponent -= 4;
1230                         }
1231                         /* Select case where first bit is set (already
1232                          * normalized) otherwise select the proper shift. */
1233                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234                         if (jumpsize <= 7) switch(jumpsize) {
1235                         case 1:
1236                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237                                         resultp4);
1238                                 result_exponent -= 3;
1239                                 break;
1240                         case 2:
1241                         case 3:
1242                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243                                         resultp4);
1244                                 result_exponent -= 2;
1245                                 break;
1246                         case 4:
1247                         case 5:
1248                         case 6:
1249                         case 7:
1250                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251                                         resultp4);
1252                                 result_exponent -= 1;
1253                                 break;
1254                         }
1255                 } /* end if (hidden...)... */
1256         /* Fall through and round */
1257         } /* end if (save < 0)... */
1258         else {
1259                 /* Add magnitudes */
1260                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261                         rightp1,rightp2,rightp3,rightp4,
1262                         /*to*/resultp1,resultp2,resultp3,resultp4);
1263                 sign_save = Dbl_signextendedsign(resultp1);
1264                 if (Dbl_isone_hiddenoverflow(resultp1)) {
1265                         /* Prenormalization required. */
1266                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267                                 resultp4);
1268                         result_exponent++;
1269                 } /* end if hiddenoverflow... */
1270         } /* end else ...add magnitudes... */
1271
1272         /* Round the result.  If the extension and lower two words are
1273          * all zeros, then the result is exact.  Otherwise round in the
1274          * correct direction.  Underflow is possible. If a postnormalization
1275          * is necessary, then the mantissa is all zeros so no shift is needed.
1276          */
1277   round:
1278         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280                         result_exponent,is_tiny);
1281         }
1282         Dbl_set_sign(resultp1,/*using*/sign_save);
1283         if (Dblext_isnotzero_mantissap3(resultp3) ||
1284             Dblext_isnotzero_mantissap4(resultp4)) {
1285                 inexact = TRUE;
1286                 switch(Rounding_mode()) {
1287                 case ROUNDNEAREST: /* The default. */
1288                         if (Dblext_isone_highp3(resultp3)) {
1289                                 /* at least 1/2 ulp */
1290                                 if (Dblext_isnotzero_low31p3(resultp3) ||
1291                                     Dblext_isnotzero_mantissap4(resultp4) ||
1292                                     Dblext_isone_lowp2(resultp2)) {
1293                                         /* either exactly half way and odd or
1294                                          * more than 1/2ulp */
1295                                         Dbl_increment(resultp1,resultp2);
1296                                 }
1297                         }
1298                         break;
1299
1300                 case ROUNDPLUS:
1301                         if (Dbl_iszero_sign(resultp1)) {
1302                                 /* Round up positive results */
1303                                 Dbl_increment(resultp1,resultp2);
1304                         }
1305                         break;
1306
1307                 case ROUNDMINUS:
1308                         if (Dbl_isone_sign(resultp1)) {
1309                                 /* Round down negative results */
1310                                 Dbl_increment(resultp1,resultp2);
1311                         }
1312
1313                 case ROUNDZERO:;
1314                         /* truncate is simple */
1315                 } /* end switch... */
1316                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317         }
1318         if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319                 /* Overflow */
1320                 if (Is_overflowtrap_enabled()) {
1321                         /*
1322                          * Adjust bias of result
1323                          */
1324                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1326                         if (inexact)
1327                             if (Is_inexacttrap_enabled())
1328                                 return (OPC_2E_OVERFLOWEXCEPTION |
1329                                         OPC_2E_INEXACTEXCEPTION);
1330                             else Set_inexactflag();
1331                         return (OPC_2E_OVERFLOWEXCEPTION);
1332                 }
1333                 inexact = TRUE;
1334                 Set_overflowflag();
1335                 Dbl_setoverflow(resultp1,resultp2);
1336         } else if (result_exponent <= 0) {      /* underflow case */
1337                 if (Is_underflowtrap_enabled()) {
1338                         /*
1339                          * Adjust bias of result
1340                          */
1341                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1343                         if (inexact)
1344                             if (Is_inexacttrap_enabled())
1345                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1346                                         OPC_2E_INEXACTEXCEPTION);
1347                             else Set_inexactflag();
1348                         return(OPC_2E_UNDERFLOWEXCEPTION);
1349                 }
1350                 else if (inexact && is_tiny) Set_underflowflag();
1351         }
1352         else Dbl_set_exponent(resultp1,result_exponent);
1353         Dbl_copytoptr(resultp1,resultp2,dstptr);
1354         if (inexact)
1355                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356                 else Set_inexactflag();
1357         return(NOEXCEPTION);
1358 }
1359
1360 /*
1361  *  Single Floating-point Multiply Fused Add
1362  */
1363
1364 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365
1366 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367 unsigned int *status;
1368 {
1369         unsigned int opnd1, opnd2, opnd3;
1370         register unsigned int tmpresp1, tmpresp2;
1371         unsigned int rightp1, rightp2;
1372         unsigned int resultp1, resultp2 = 0;
1373         register int mpy_exponent, add_exponent, count;
1374         boolean inexact = FALSE, is_tiny = FALSE;
1375
1376         unsigned int signlessleft1, signlessright1, save;
1377         register int result_exponent, diff_exponent;
1378         int sign_save, jumpsize;
1379
1380         Sgl_copyfromptr(src1ptr,opnd1);
1381         Sgl_copyfromptr(src2ptr,opnd2);
1382         Sgl_copyfromptr(src3ptr,opnd3);
1383
1384         /*
1385          * set sign bit of result of multiply
1386          */
1387         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388                 Sgl_setnegativezero(resultp1);
1389         else Sgl_setzero(resultp1);
1390
1391         /*
1392          * Generate multiply exponent
1393          */
1394         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395
1396         /*
1397          * check first operand for NaN's or infinity
1398          */
1399         if (Sgl_isinfinity_exponent(opnd1)) {
1400                 if (Sgl_iszero_mantissa(opnd1)) {
1401                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
1403                                         /*
1404                                          * invalid since operands are infinity
1405                                          * and zero
1406                                          */
1407                                         if (Is_invalidtrap_enabled())
1408                                                 return(OPC_2E_INVALIDEXCEPTION);
1409                                         Set_invalidflag();
1410                                         Sgl_makequietnan(resultp1);
1411                                         Sgl_copytoptr(resultp1,dstptr);
1412                                         return(NOEXCEPTION);
1413                                 }
1414                                 /*
1415                                  * Check third operand for infinity with a
1416                                  *  sign opposite of the multiply result
1417                                  */
1418                                 if (Sgl_isinfinity(opnd3) &&
1419                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420                                         /*
1421                                          * invalid since attempting a magnitude
1422                                          * subtraction of infinities
1423                                          */
1424                                         if (Is_invalidtrap_enabled())
1425                                                 return(OPC_2E_INVALIDEXCEPTION);
1426                                         Set_invalidflag();
1427                                         Sgl_makequietnan(resultp1);
1428                                         Sgl_copytoptr(resultp1,dstptr);
1429                                         return(NOEXCEPTION);
1430                                 }
1431
1432                                 /*
1433                                  * return infinity
1434                                  */
1435                                 Sgl_setinfinity_exponentmantissa(resultp1);
1436                                 Sgl_copytoptr(resultp1,dstptr);
1437                                 return(NOEXCEPTION);
1438                         }
1439                 }
1440                 else {
1441                         /*
1442                          * is NaN; signaling or quiet?
1443                          */
1444                         if (Sgl_isone_signaling(opnd1)) {
1445                                 /* trap if INVALIDTRAP enabled */
1446                                 if (Is_invalidtrap_enabled())
1447                                         return(OPC_2E_INVALIDEXCEPTION);
1448                                 /* make NaN quiet */
1449                                 Set_invalidflag();
1450                                 Sgl_set_quiet(opnd1);
1451                         }
1452                         /*
1453                          * is second operand a signaling NaN?
1454                          */
1455                         else if (Sgl_is_signalingnan(opnd2)) {
1456                                 /* trap if INVALIDTRAP enabled */
1457                                 if (Is_invalidtrap_enabled())
1458                                         return(OPC_2E_INVALIDEXCEPTION);
1459                                 /* make NaN quiet */
1460                                 Set_invalidflag();
1461                                 Sgl_set_quiet(opnd2);
1462                                 Sgl_copytoptr(opnd2,dstptr);
1463                                 return(NOEXCEPTION);
1464                         }
1465                         /*
1466                          * is third operand a signaling NaN?
1467                          */
1468                         else if (Sgl_is_signalingnan(opnd3)) {
1469                                 /* trap if INVALIDTRAP enabled */
1470                                 if (Is_invalidtrap_enabled())
1471                                         return(OPC_2E_INVALIDEXCEPTION);
1472                                 /* make NaN quiet */
1473                                 Set_invalidflag();
1474                                 Sgl_set_quiet(opnd3);
1475                                 Sgl_copytoptr(opnd3,dstptr);
1476                                 return(NOEXCEPTION);
1477                         }
1478                         /*
1479                          * return quiet NaN
1480                          */
1481                         Sgl_copytoptr(opnd1,dstptr);
1482                         return(NOEXCEPTION);
1483                 }
1484         }
1485
1486         /*
1487          * check second operand for NaN's or infinity
1488          */
1489         if (Sgl_isinfinity_exponent(opnd2)) {
1490                 if (Sgl_iszero_mantissa(opnd2)) {
1491                         if (Sgl_isnotnan(opnd3)) {
1492                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
1493                                         /*
1494                                          * invalid since multiply operands are
1495                                          * zero & infinity
1496                                          */
1497                                         if (Is_invalidtrap_enabled())
1498                                                 return(OPC_2E_INVALIDEXCEPTION);
1499                                         Set_invalidflag();
1500                                         Sgl_makequietnan(opnd2);
1501                                         Sgl_copytoptr(opnd2,dstptr);
1502                                         return(NOEXCEPTION);
1503                                 }
1504
1505                                 /*
1506                                  * Check third operand for infinity with a
1507                                  *  sign opposite of the multiply result
1508                                  */
1509                                 if (Sgl_isinfinity(opnd3) &&
1510                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511                                         /*
1512                                          * invalid since attempting a magnitude
1513                                          * subtraction of infinities
1514                                          */
1515                                         if (Is_invalidtrap_enabled())
1516                                                 return(OPC_2E_INVALIDEXCEPTION);
1517                                         Set_invalidflag();
1518                                         Sgl_makequietnan(resultp1);
1519                                         Sgl_copytoptr(resultp1,dstptr);
1520                                         return(NOEXCEPTION);
1521                                 }
1522
1523                                 /*
1524                                  * return infinity
1525                                  */
1526                                 Sgl_setinfinity_exponentmantissa(resultp1);
1527                                 Sgl_copytoptr(resultp1,dstptr);
1528                                 return(NOEXCEPTION);
1529                         }
1530                 }
1531                 else {
1532                         /*
1533                          * is NaN; signaling or quiet?
1534                          */
1535                         if (Sgl_isone_signaling(opnd2)) {
1536                                 /* trap if INVALIDTRAP enabled */
1537                                 if (Is_invalidtrap_enabled())
1538                                         return(OPC_2E_INVALIDEXCEPTION);
1539                                 /* make NaN quiet */
1540                                 Set_invalidflag();
1541                                 Sgl_set_quiet(opnd2);
1542                         }
1543                         /*
1544                          * is third operand a signaling NaN?
1545                          */
1546                         else if (Sgl_is_signalingnan(opnd3)) {
1547                                 /* trap if INVALIDTRAP enabled */
1548                                 if (Is_invalidtrap_enabled())
1549                                                 return(OPC_2E_INVALIDEXCEPTION);
1550                                 /* make NaN quiet */
1551                                 Set_invalidflag();
1552                                 Sgl_set_quiet(opnd3);
1553                                 Sgl_copytoptr(opnd3,dstptr);
1554                                 return(NOEXCEPTION);
1555                         }
1556                         /*
1557                          * return quiet NaN
1558                          */
1559                         Sgl_copytoptr(opnd2,dstptr);
1560                         return(NOEXCEPTION);
1561                 }
1562         }
1563
1564         /*
1565          * check third operand for NaN's or infinity
1566          */
1567         if (Sgl_isinfinity_exponent(opnd3)) {
1568                 if (Sgl_iszero_mantissa(opnd3)) {
1569                         /* return infinity */
1570                         Sgl_copytoptr(opnd3,dstptr);
1571                         return(NOEXCEPTION);
1572                 } else {
1573                         /*
1574                          * is NaN; signaling or quiet?
1575                          */
1576                         if (Sgl_isone_signaling(opnd3)) {
1577                                 /* trap if INVALIDTRAP enabled */
1578                                 if (Is_invalidtrap_enabled())
1579                                         return(OPC_2E_INVALIDEXCEPTION);
1580                                 /* make NaN quiet */
1581                                 Set_invalidflag();
1582                                 Sgl_set_quiet(opnd3);
1583                         }
1584                         /*
1585                          * return quiet NaN
1586                          */
1587                         Sgl_copytoptr(opnd3,dstptr);
1588                         return(NOEXCEPTION);
1589                 }
1590         }
1591
1592         /*
1593          * Generate multiply mantissa
1594          */
1595         if (Sgl_isnotzero_exponent(opnd1)) {
1596                 /* set hidden bit */
1597                 Sgl_clear_signexponent_set_hidden(opnd1);
1598         }
1599         else {
1600                 /* check for zero */
1601                 if (Sgl_iszero_mantissa(opnd1)) {
1602                         /*
1603                          * Perform the add opnd3 with zero here.
1604                          */
1605                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1606                                 if (Is_rounding_mode(ROUNDMINUS)) {
1607                                         Sgl_or_signs(opnd3,resultp1);
1608                                 } else {
1609                                         Sgl_and_signs(opnd3,resultp1);
1610                                 }
1611                         }
1612                         /*
1613                          * Now let's check for trapped underflow case.
1614                          */
1615                         else if (Sgl_iszero_exponent(opnd3) &&
1616                                  Is_underflowtrap_enabled()) {
1617                                 /* need to normalize results mantissa */
1618                                 sign_save = Sgl_signextendedsign(opnd3);
1619                                 result_exponent = 0;
1620                                 Sgl_leftshiftby1(opnd3);
1621                                 Sgl_normalize(opnd3,result_exponent);
1622                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1623                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1624                                                         unfl);
1625                                 Sgl_copytoptr(opnd3,dstptr);
1626                                 /* inexact = FALSE */
1627                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1628                         }
1629                         Sgl_copytoptr(opnd3,dstptr);
1630                         return(NOEXCEPTION);
1631                 }
1632                 /* is denormalized, adjust exponent */
1633                 Sgl_clear_signexponent(opnd1);
1634                 Sgl_leftshiftby1(opnd1);
1635                 Sgl_normalize(opnd1,mpy_exponent);
1636         }
1637         /* opnd2 needs to have hidden bit set with msb in hidden bit */
1638         if (Sgl_isnotzero_exponent(opnd2)) {
1639                 Sgl_clear_signexponent_set_hidden(opnd2);
1640         }
1641         else {
1642                 /* check for zero */
1643                 if (Sgl_iszero_mantissa(opnd2)) {
1644                         /*
1645                          * Perform the add opnd3 with zero here.
1646                          */
1647                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1648                                 if (Is_rounding_mode(ROUNDMINUS)) {
1649                                         Sgl_or_signs(opnd3,resultp1);
1650                                 } else {
1651                                         Sgl_and_signs(opnd3,resultp1);
1652                                 }
1653                         }
1654                         /*
1655                          * Now let's check for trapped underflow case.
1656                          */
1657                         else if (Sgl_iszero_exponent(opnd3) &&
1658                             Is_underflowtrap_enabled()) {
1659                                 /* need to normalize results mantissa */
1660                                 sign_save = Sgl_signextendedsign(opnd3);
1661                                 result_exponent = 0;
1662                                 Sgl_leftshiftby1(opnd3);
1663                                 Sgl_normalize(opnd3,result_exponent);
1664                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1665                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1666                                                         unfl);
1667                                 Sgl_copytoptr(opnd3,dstptr);
1668                                 /* inexact = FALSE */
1669                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1670                         }
1671                         Sgl_copytoptr(opnd3,dstptr);
1672                         return(NOEXCEPTION);
1673                 }
1674                 /* is denormalized; want to normalize */
1675                 Sgl_clear_signexponent(opnd2);
1676                 Sgl_leftshiftby1(opnd2);
1677                 Sgl_normalize(opnd2,mpy_exponent);
1678         }
1679
1680         /* Multiply the first two source mantissas together */
1681
1682         /*
1683          * The intermediate result will be kept in tmpres,
1684          * which needs enough room for 106 bits of mantissa,
1685          * so lets call it a Double extended.
1686          */
1687         Sglext_setzero(tmpresp1,tmpresp2);
1688
1689         /*
1690          * Four bits at a time are inspected in each loop, and a
1691          * simple shift and add multiply algorithm is used.
1692          */
1693         for (count = SGL_P-1; count >= 0; count -= 4) {
1694                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695                 if (Sbit28(opnd1)) {
1696                         /* Twoword_add should be an ADD followed by 2 ADDC's */
1697                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698                 }
1699                 if (Sbit29(opnd1)) {
1700                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701                 }
1702                 if (Sbit30(opnd1)) {
1703                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704                 }
1705                 if (Sbit31(opnd1)) {
1706                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707                 }
1708                 Sgl_rightshiftby4(opnd1);
1709         }
1710         if (Is_sexthiddenoverflow(tmpresp1)) {
1711                 /* result mantissa >= 2 (mantissa overflow) */
1712                 mpy_exponent++;
1713                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714         } else {
1715                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716         }
1717
1718         /*
1719          * Restore the sign of the mpy result which was saved in resultp1.
1720          * The exponent will continue to be kept in mpy_exponent.
1721          */
1722         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723
1724         /*
1725          * No rounding is required, since the result of the multiply
1726          * is exact in the extended format.
1727          */
1728
1729         /*
1730          * Now we are ready to perform the add portion of the operation.
1731          *
1732          * The exponents need to be kept as integers for now, since the
1733          * multiply result might not fit into the exponent field.  We
1734          * can't overflow or underflow because of this yet, since the
1735          * add could bring the final result back into range.
1736          */
1737         add_exponent = Sgl_exponent(opnd3);
1738
1739         /*
1740          * Check for denormalized or zero add operand.
1741          */
1742         if (add_exponent == 0) {
1743                 /* check for zero */
1744                 if (Sgl_iszero_mantissa(opnd3)) {
1745                         /* right is zero */
1746                         /* Left can't be zero and must be result.
1747                          *
1748                          * The final result is now in tmpres and mpy_exponent,
1749                          * and needs to be rounded and squeezed back into
1750                          * double precision format from double extended.
1751                          */
1752                         result_exponent = mpy_exponent;
1753                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755                         goto round;
1756                 }
1757
1758                 /*
1759                  * Neither are zeroes.
1760                  * Adjust exponent and normalize add operand.
1761                  */
1762                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
1763                 Sgl_clear_signexponent(opnd3);
1764                 Sgl_leftshiftby1(opnd3);
1765                 Sgl_normalize(opnd3,add_exponent);
1766                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
1767         } else {
1768                 Sgl_clear_exponent_set_hidden(opnd3);
1769         }
1770         /*
1771          * Copy opnd3 to the double extended variable called right.
1772          */
1773         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774
1775         /*
1776          * A zero "save" helps discover equal operands (for later),
1777          * and is used in swapping operands (if needed).
1778          */
1779         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780
1781         /*
1782          * Compare magnitude of operands.
1783          */
1784         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788                 /*
1789                  * Set the left operand to the larger one by XOR swap.
1790                  * First finish the first word "save".
1791                  */
1792                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794                 Sglext_swap_lower(tmpresp2,rightp2);
1795                 /* also setup exponents used in rest of routine */
1796                 diff_exponent = add_exponent - mpy_exponent;
1797                 result_exponent = add_exponent;
1798         } else {
1799                 /* also setup exponents used in rest of routine */
1800                 diff_exponent = mpy_exponent - add_exponent;
1801                 result_exponent = mpy_exponent;
1802         }
1803         /* Invariant: left is not smaller than right. */
1804
1805         /*
1806          * Special case alignment of operands that would force alignment
1807          * beyond the extent of the extension.  A further optimization
1808          * could special case this but only reduces the path length for
1809          * this infrequent case.
1810          */
1811         if (diff_exponent > SGLEXT_THRESHOLD) {
1812                 diff_exponent = SGLEXT_THRESHOLD;
1813         }
1814
1815         /* Align right operand by shifting it to the right */
1816         Sglext_clear_sign(rightp1);
1817         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818
1819         /* Treat sum and difference of the operands separately. */
1820         if ((int)save < 0) {
1821                 /*
1822                  * Difference of the two operands.  Overflow can occur if the
1823                  * multiply overflowed.  A borrow can occur out of the hidden
1824                  * bit and force a post normalization phase.
1825                  */
1826                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827                         resultp1,resultp2);
1828                 sign_save = Sgl_signextendedsign(resultp1);
1829                 if (Sgl_iszero_hidden(resultp1)) {
1830                         /* Handle normalization */
1831                 /* A straightforward algorithm would now shift the
1832                  * result and extension left until the hidden bit
1833                  * becomes one.  Not all of the extension bits need
1834                  * participate in the shift.  Only the two most
1835                  * significant bits (round and guard) are needed.
1836                  * If only a single shift is needed then the guard
1837                  * bit becomes a significant low order bit and the
1838                  * extension must participate in the rounding.
1839                  * If more than a single shift is needed, then all
1840                  * bits to the right of the guard bit are zeros,
1841                  * and the guard bit may or may not be zero. */
1842                         Sglext_leftshiftby1(resultp1,resultp2);
1843
1844                         /* Need to check for a zero result.  The sign and
1845                          * exponent fields have already been zeroed.  The more
1846                          * efficient test of the full object can be used.
1847                          */
1848                          if (Sglext_iszero(resultp1,resultp2)) {
1849                                 /* Must have been "x-x" or "x+(-x)". */
1850                                 if (Is_rounding_mode(ROUNDMINUS))
1851                                         Sgl_setone_sign(resultp1);
1852                                 Sgl_copytoptr(resultp1,dstptr);
1853                                 return(NOEXCEPTION);
1854                         }
1855                         result_exponent--;
1856
1857                         /* Look to see if normalization is finished. */
1858                         if (Sgl_isone_hidden(resultp1)) {
1859                                 /* No further normalization is needed */
1860                                 goto round;
1861                         }
1862
1863                         /* Discover first one bit to determine shift amount.
1864                          * Use a modified binary search.  We have already
1865                          * shifted the result one position right and still
1866                          * not found a one so the remainder of the extension
1867                          * must be zero and simplifies rounding. */
1868                         /* Scan bytes */
1869                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870                                 Sglext_leftshiftby8(resultp1,resultp2);
1871                                 result_exponent -= 8;
1872                         }
1873                         /* Now narrow it down to the nibble */
1874                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875                                 /* The lower nibble contains the
1876                                  * normalizing one */
1877                                 Sglext_leftshiftby4(resultp1,resultp2);
1878                                 result_exponent -= 4;
1879                         }
1880                         /* Select case where first bit is set (already
1881                          * normalized) otherwise select the proper shift. */
1882                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883                         if (jumpsize <= 7) switch(jumpsize) {
1884                         case 1:
1885                                 Sglext_leftshiftby3(resultp1,resultp2);
1886                                 result_exponent -= 3;
1887                                 break;
1888                         case 2:
1889                         case 3:
1890                                 Sglext_leftshiftby2(resultp1,resultp2);
1891                                 result_exponent -= 2;
1892                                 break;
1893                         case 4:
1894                         case 5:
1895                         case 6:
1896                         case 7:
1897                                 Sglext_leftshiftby1(resultp1,resultp2);
1898                                 result_exponent -= 1;
1899                                 break;
1900                         }
1901                 } /* end if (hidden...)... */
1902         /* Fall through and round */
1903         } /* end if (save < 0)... */
1904         else {
1905                 /* Add magnitudes */
1906                 Sglext_addition(tmpresp1,tmpresp2,
1907                         rightp1,rightp2, /*to*/resultp1,resultp2);
1908                 sign_save = Sgl_signextendedsign(resultp1);
1909                 if (Sgl_isone_hiddenoverflow(resultp1)) {
1910                         /* Prenormalization required. */
1911                         Sglext_arithrightshiftby1(resultp1,resultp2);
1912                         result_exponent++;
1913                 } /* end if hiddenoverflow... */
1914         } /* end else ...add magnitudes... */
1915
1916         /* Round the result.  If the extension and lower two words are
1917          * all zeros, then the result is exact.  Otherwise round in the
1918          * correct direction.  Underflow is possible. If a postnormalization
1919          * is necessary, then the mantissa is all zeros so no shift is needed.
1920          */
1921   round:
1922         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924         }
1925         Sgl_set_sign(resultp1,/*using*/sign_save);
1926         if (Sglext_isnotzero_mantissap2(resultp2)) {
1927                 inexact = TRUE;
1928                 switch(Rounding_mode()) {
1929                 case ROUNDNEAREST: /* The default. */
1930                         if (Sglext_isone_highp2(resultp2)) {
1931                                 /* at least 1/2 ulp */
1932                                 if (Sglext_isnotzero_low31p2(resultp2) ||
1933                                     Sglext_isone_lowp1(resultp1)) {
1934                                         /* either exactly half way and odd or
1935                                          * more than 1/2ulp */
1936                                         Sgl_increment(resultp1);
1937                                 }
1938                         }
1939                         break;
1940
1941                 case ROUNDPLUS:
1942                         if (Sgl_iszero_sign(resultp1)) {
1943                                 /* Round up positive results */
1944                                 Sgl_increment(resultp1);
1945                         }
1946                         break;
1947
1948                 case ROUNDMINUS:
1949                         if (Sgl_isone_sign(resultp1)) {
1950                                 /* Round down negative results */
1951                                 Sgl_increment(resultp1);
1952                         }
1953
1954                 case ROUNDZERO:;
1955                         /* truncate is simple */
1956                 } /* end switch... */
1957                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958         }
1959         if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960                 /* Overflow */
1961                 if (Is_overflowtrap_enabled()) {
1962                         /*
1963                          * Adjust bias of result
1964                          */
1965                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966                         Sgl_copytoptr(resultp1,dstptr);
1967                         if (inexact)
1968                             if (Is_inexacttrap_enabled())
1969                                 return (OPC_2E_OVERFLOWEXCEPTION |
1970                                         OPC_2E_INEXACTEXCEPTION);
1971                             else Set_inexactflag();
1972                         return (OPC_2E_OVERFLOWEXCEPTION);
1973                 }
1974                 inexact = TRUE;
1975                 Set_overflowflag();
1976                 Sgl_setoverflow(resultp1);
1977         } else if (result_exponent <= 0) {      /* underflow case */
1978                 if (Is_underflowtrap_enabled()) {
1979                         /*
1980                          * Adjust bias of result
1981                          */
1982                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983                         Sgl_copytoptr(resultp1,dstptr);
1984                         if (inexact)
1985                             if (Is_inexacttrap_enabled())
1986                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1987                                         OPC_2E_INEXACTEXCEPTION);
1988                             else Set_inexactflag();
1989                         return(OPC_2E_UNDERFLOWEXCEPTION);
1990                 }
1991                 else if (inexact && is_tiny) Set_underflowflag();
1992         }
1993         else Sgl_set_exponent(resultp1,result_exponent);
1994         Sgl_copytoptr(resultp1,dstptr);
1995         if (inexact)
1996                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997                 else Set_inexactflag();
1998         return(NOEXCEPTION);
1999 }
2000
2001 /*
2002  *  Single Floating-point Multiply Negate Fused Add
2003  */
2004
2005 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006
2007 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008 unsigned int *status;
2009 {
2010         unsigned int opnd1, opnd2, opnd3;
2011         register unsigned int tmpresp1, tmpresp2;
2012         unsigned int rightp1, rightp2;
2013         unsigned int resultp1, resultp2 = 0;
2014         register int mpy_exponent, add_exponent, count;
2015         boolean inexact = FALSE, is_tiny = FALSE;
2016
2017         unsigned int signlessleft1, signlessright1, save;
2018         register int result_exponent, diff_exponent;
2019         int sign_save, jumpsize;
2020
2021         Sgl_copyfromptr(src1ptr,opnd1);
2022         Sgl_copyfromptr(src2ptr,opnd2);
2023         Sgl_copyfromptr(src3ptr,opnd3);
2024
2025         /*
2026          * set sign bit of result of multiply
2027          */
2028         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029                 Sgl_setzero(resultp1);
2030         else
2031                 Sgl_setnegativezero(resultp1);
2032
2033         /*
2034          * Generate multiply exponent
2035          */
2036         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037
2038         /*
2039          * check first operand for NaN's or infinity
2040          */
2041         if (Sgl_isinfinity_exponent(opnd1)) {
2042                 if (Sgl_iszero_mantissa(opnd1)) {
2043                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
2045                                         /*
2046                                          * invalid since operands are infinity
2047                                          * and zero
2048                                          */
2049                                         if (Is_invalidtrap_enabled())
2050                                                 return(OPC_2E_INVALIDEXCEPTION);
2051                                         Set_invalidflag();
2052                                         Sgl_makequietnan(resultp1);
2053                                         Sgl_copytoptr(resultp1,dstptr);
2054                                         return(NOEXCEPTION);
2055                                 }
2056                                 /*
2057                                  * Check third operand for infinity with a
2058                                  *  sign opposite of the multiply result
2059                                  */
2060                                 if (Sgl_isinfinity(opnd3) &&
2061                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062                                         /*
2063                                          * invalid since attempting a magnitude
2064                                          * subtraction of infinities
2065                                          */
2066                                         if (Is_invalidtrap_enabled())
2067                                                 return(OPC_2E_INVALIDEXCEPTION);
2068                                         Set_invalidflag();
2069                                         Sgl_makequietnan(resultp1);
2070                                         Sgl_copytoptr(resultp1,dstptr);
2071                                         return(NOEXCEPTION);
2072                                 }
2073
2074                                 /*
2075                                  * return infinity
2076                                  */
2077                                 Sgl_setinfinity_exponentmantissa(resultp1);
2078                                 Sgl_copytoptr(resultp1,dstptr);
2079                                 return(NOEXCEPTION);
2080                         }
2081                 }
2082                 else {
2083                         /*
2084                          * is NaN; signaling or quiet?
2085                          */
2086                         if (Sgl_isone_signaling(opnd1)) {
2087                                 /* trap if INVALIDTRAP enabled */
2088                                 if (Is_invalidtrap_enabled())
2089                                         return(OPC_2E_INVALIDEXCEPTION);
2090                                 /* make NaN quiet */
2091                                 Set_invalidflag();
2092                                 Sgl_set_quiet(opnd1);
2093                         }
2094                         /*
2095                          * is second operand a signaling NaN?
2096                          */
2097                         else if (Sgl_is_signalingnan(opnd2)) {
2098                                 /* trap if INVALIDTRAP enabled */
2099                                 if (Is_invalidtrap_enabled())
2100                                         return(OPC_2E_INVALIDEXCEPTION);
2101                                 /* make NaN quiet */
2102                                 Set_invalidflag();
2103                                 Sgl_set_quiet(opnd2);
2104                                 Sgl_copytoptr(opnd2,dstptr);
2105                                 return(NOEXCEPTION);
2106                         }
2107                         /*
2108                          * is third operand a signaling NaN?
2109                          */
2110                         else if (Sgl_is_signalingnan(opnd3)) {
2111                                 /* trap if INVALIDTRAP enabled */
2112                                 if (Is_invalidtrap_enabled())
2113                                         return(OPC_2E_INVALIDEXCEPTION);
2114                                 /* make NaN quiet */
2115                                 Set_invalidflag();
2116                                 Sgl_set_quiet(opnd3);
2117                                 Sgl_copytoptr(opnd3,dstptr);
2118                                 return(NOEXCEPTION);
2119                         }
2120                         /*
2121                          * return quiet NaN
2122                          */
2123                         Sgl_copytoptr(opnd1,dstptr);
2124                         return(NOEXCEPTION);
2125                 }
2126         }
2127
2128         /*
2129          * check second operand for NaN's or infinity
2130          */
2131         if (Sgl_isinfinity_exponent(opnd2)) {
2132                 if (Sgl_iszero_mantissa(opnd2)) {
2133                         if (Sgl_isnotnan(opnd3)) {
2134                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
2135                                         /*
2136                                          * invalid since multiply operands are
2137                                          * zero & infinity
2138                                          */
2139                                         if (Is_invalidtrap_enabled())
2140                                                 return(OPC_2E_INVALIDEXCEPTION);
2141                                         Set_invalidflag();
2142                                         Sgl_makequietnan(opnd2);
2143                                         Sgl_copytoptr(opnd2,dstptr);
2144                                         return(NOEXCEPTION);
2145                                 }
2146
2147                                 /*
2148                                  * Check third operand for infinity with a
2149                                  *  sign opposite of the multiply result
2150                                  */
2151                                 if (Sgl_isinfinity(opnd3) &&
2152                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153                                         /*
2154                                          * invalid since attempting a magnitude
2155                                          * subtraction of infinities
2156                                          */
2157                                         if (Is_invalidtrap_enabled())
2158                                                 return(OPC_2E_INVALIDEXCEPTION);
2159                                         Set_invalidflag();
2160                                         Sgl_makequietnan(resultp1);
2161                                         Sgl_copytoptr(resultp1,dstptr);
2162                                         return(NOEXCEPTION);
2163                                 }
2164
2165                                 /*
2166                                  * return infinity
2167                                  */
2168                                 Sgl_setinfinity_exponentmantissa(resultp1);
2169                                 Sgl_copytoptr(resultp1,dstptr);
2170                                 return(NOEXCEPTION);
2171                         }
2172                 }
2173                 else {
2174                         /*
2175                          * is NaN; signaling or quiet?
2176                          */
2177                         if (Sgl_isone_signaling(opnd2)) {
2178                                 /* trap if INVALIDTRAP enabled */
2179                                 if (Is_invalidtrap_enabled())
2180                                         return(OPC_2E_INVALIDEXCEPTION);
2181                                 /* make NaN quiet */
2182                                 Set_invalidflag();
2183                                 Sgl_set_quiet(opnd2);
2184                         }
2185                         /*
2186                          * is third operand a signaling NaN?
2187                          */
2188                         else if (Sgl_is_signalingnan(opnd3)) {
2189                                 /* trap if INVALIDTRAP enabled */
2190                                 if (Is_invalidtrap_enabled())
2191                                                 return(OPC_2E_INVALIDEXCEPTION);
2192                                 /* make NaN quiet */
2193                                 Set_invalidflag();
2194                                 Sgl_set_quiet(opnd3);
2195                                 Sgl_copytoptr(opnd3,dstptr);
2196                                 return(NOEXCEPTION);
2197                         }
2198                         /*
2199                          * return quiet NaN
2200                          */
2201                         Sgl_copytoptr(opnd2,dstptr);
2202                         return(NOEXCEPTION);
2203                 }
2204         }
2205
2206         /*
2207          * check third operand for NaN's or infinity
2208          */
2209         if (Sgl_isinfinity_exponent(opnd3)) {
2210                 if (Sgl_iszero_mantissa(opnd3)) {
2211                         /* return infinity */
2212                         Sgl_copytoptr(opnd3,dstptr);
2213                         return(NOEXCEPTION);
2214                 } else {
2215                         /*
2216                          * is NaN; signaling or quiet?
2217                          */
2218                         if (Sgl_isone_signaling(opnd3)) {
2219                                 /* trap if INVALIDTRAP enabled */
2220                                 if (Is_invalidtrap_enabled())
2221                                         return(OPC_2E_INVALIDEXCEPTION);
2222                                 /* make NaN quiet */
2223                                 Set_invalidflag();
2224                                 Sgl_set_quiet(opnd3);
2225                         }
2226                         /*
2227                          * return quiet NaN
2228                          */
2229                         Sgl_copytoptr(opnd3,dstptr);
2230                         return(NOEXCEPTION);
2231                 }
2232         }
2233
2234         /*
2235          * Generate multiply mantissa
2236          */
2237         if (Sgl_isnotzero_exponent(opnd1)) {
2238                 /* set hidden bit */
2239                 Sgl_clear_signexponent_set_hidden(opnd1);
2240         }
2241         else {
2242                 /* check for zero */
2243                 if (Sgl_iszero_mantissa(opnd1)) {
2244                         /*
2245                          * Perform the add opnd3 with zero here.
2246                          */
2247                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2248                                 if (Is_rounding_mode(ROUNDMINUS)) {
2249                                         Sgl_or_signs(opnd3,resultp1);
2250                                 } else {
2251                                         Sgl_and_signs(opnd3,resultp1);
2252                                 }
2253                         }
2254                         /*
2255                          * Now let's check for trapped underflow case.
2256                          */
2257                         else if (Sgl_iszero_exponent(opnd3) &&
2258                                  Is_underflowtrap_enabled()) {
2259                                 /* need to normalize results mantissa */
2260                                 sign_save = Sgl_signextendedsign(opnd3);
2261                                 result_exponent = 0;
2262                                 Sgl_leftshiftby1(opnd3);
2263                                 Sgl_normalize(opnd3,result_exponent);
2264                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2265                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2266                                                         unfl);
2267                                 Sgl_copytoptr(opnd3,dstptr);
2268                                 /* inexact = FALSE */
2269                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2270                         }
2271                         Sgl_copytoptr(opnd3,dstptr);
2272                         return(NOEXCEPTION);
2273                 }
2274                 /* is denormalized, adjust exponent */
2275                 Sgl_clear_signexponent(opnd1);
2276                 Sgl_leftshiftby1(opnd1);
2277                 Sgl_normalize(opnd1,mpy_exponent);
2278         }
2279         /* opnd2 needs to have hidden bit set with msb in hidden bit */
2280         if (Sgl_isnotzero_exponent(opnd2)) {
2281                 Sgl_clear_signexponent_set_hidden(opnd2);
2282         }
2283         else {
2284                 /* check for zero */
2285                 if (Sgl_iszero_mantissa(opnd2)) {
2286                         /*
2287                          * Perform the add opnd3 with zero here.
2288                          */
2289                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2290                                 if (Is_rounding_mode(ROUNDMINUS)) {
2291                                         Sgl_or_signs(opnd3,resultp1);
2292                                 } else {
2293                                         Sgl_and_signs(opnd3,resultp1);
2294                                 }
2295                         }
2296                         /*
2297                          * Now let's check for trapped underflow case.
2298                          */
2299                         else if (Sgl_iszero_exponent(opnd3) &&
2300                             Is_underflowtrap_enabled()) {
2301                                 /* need to normalize results mantissa */
2302                                 sign_save = Sgl_signextendedsign(opnd3);
2303                                 result_exponent = 0;
2304                                 Sgl_leftshiftby1(opnd3);
2305                                 Sgl_normalize(opnd3,result_exponent);
2306                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2307                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2308                                                         unfl);
2309                                 Sgl_copytoptr(opnd3,dstptr);
2310                                 /* inexact = FALSE */
2311                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2312                         }
2313                         Sgl_copytoptr(opnd3,dstptr);
2314                         return(NOEXCEPTION);
2315                 }
2316                 /* is denormalized; want to normalize */
2317                 Sgl_clear_signexponent(opnd2);
2318                 Sgl_leftshiftby1(opnd2);
2319                 Sgl_normalize(opnd2,mpy_exponent);
2320         }
2321
2322         /* Multiply the first two source mantissas together */
2323
2324         /*
2325          * The intermediate result will be kept in tmpres,
2326          * which needs enough room for 106 bits of mantissa,
2327          * so lets call it a Double extended.
2328          */
2329         Sglext_setzero(tmpresp1,tmpresp2);
2330
2331         /*
2332          * Four bits at a time are inspected in each loop, and a
2333          * simple shift and add multiply algorithm is used.
2334          */
2335         for (count = SGL_P-1; count >= 0; count -= 4) {
2336                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337                 if (Sbit28(opnd1)) {
2338                         /* Twoword_add should be an ADD followed by 2 ADDC's */
2339                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340                 }
2341                 if (Sbit29(opnd1)) {
2342                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343                 }
2344                 if (Sbit30(opnd1)) {
2345                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346                 }
2347                 if (Sbit31(opnd1)) {
2348                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349                 }
2350                 Sgl_rightshiftby4(opnd1);
2351         }
2352         if (Is_sexthiddenoverflow(tmpresp1)) {
2353                 /* result mantissa >= 2 (mantissa overflow) */
2354                 mpy_exponent++;
2355                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356         } else {
2357                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358         }
2359
2360         /*
2361          * Restore the sign of the mpy result which was saved in resultp1.
2362          * The exponent will continue to be kept in mpy_exponent.
2363          */
2364         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365
2366         /*
2367          * No rounding is required, since the result of the multiply
2368          * is exact in the extended format.
2369          */
2370
2371         /*
2372          * Now we are ready to perform the add portion of the operation.
2373          *
2374          * The exponents need to be kept as integers for now, since the
2375          * multiply result might not fit into the exponent field.  We
2376          * can't overflow or underflow because of this yet, since the
2377          * add could bring the final result back into range.
2378          */
2379         add_exponent = Sgl_exponent(opnd3);
2380
2381         /*
2382          * Check for denormalized or zero add operand.
2383          */
2384         if (add_exponent == 0) {
2385                 /* check for zero */
2386                 if (Sgl_iszero_mantissa(opnd3)) {
2387                         /* right is zero */
2388                         /* Left can't be zero and must be result.
2389                          *
2390                          * The final result is now in tmpres and mpy_exponent,
2391                          * and needs to be rounded and squeezed back into
2392                          * double precision format from double extended.
2393                          */
2394                         result_exponent = mpy_exponent;
2395                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397                         goto round;
2398                 }
2399
2400                 /*
2401                  * Neither are zeroes.
2402                  * Adjust exponent and normalize add operand.
2403                  */
2404                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
2405                 Sgl_clear_signexponent(opnd3);
2406                 Sgl_leftshiftby1(opnd3);
2407                 Sgl_normalize(opnd3,add_exponent);
2408                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
2409         } else {
2410                 Sgl_clear_exponent_set_hidden(opnd3);
2411         }
2412         /*
2413          * Copy opnd3 to the double extended variable called right.
2414          */
2415         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416
2417         /*
2418          * A zero "save" helps discover equal operands (for later),
2419          * and is used in swapping operands (if needed).
2420          */
2421         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422
2423         /*
2424          * Compare magnitude of operands.
2425          */
2426         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430                 /*
2431                  * Set the left operand to the larger one by XOR swap.
2432                  * First finish the first word "save".
2433                  */
2434                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436                 Sglext_swap_lower(tmpresp2,rightp2);
2437                 /* also setup exponents used in rest of routine */
2438                 diff_exponent = add_exponent - mpy_exponent;
2439                 result_exponent = add_exponent;
2440         } else {
2441                 /* also setup exponents used in rest of routine */
2442                 diff_exponent = mpy_exponent - add_exponent;
2443                 result_exponent = mpy_exponent;
2444         }
2445         /* Invariant: left is not smaller than right. */
2446
2447         /*
2448          * Special case alignment of operands that would force alignment
2449          * beyond the extent of the extension.  A further optimization
2450          * could special case this but only reduces the path length for
2451          * this infrequent case.
2452          */
2453         if (diff_exponent > SGLEXT_THRESHOLD) {
2454                 diff_exponent = SGLEXT_THRESHOLD;
2455         }
2456
2457         /* Align right operand by shifting it to the right */
2458         Sglext_clear_sign(rightp1);
2459         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460
2461         /* Treat sum and difference of the operands separately. */
2462         if ((int)save < 0) {
2463                 /*
2464                  * Difference of the two operands.  Overflow can occur if the
2465                  * multiply overflowed.  A borrow can occur out of the hidden
2466                  * bit and force a post normalization phase.
2467                  */
2468                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469                         resultp1,resultp2);
2470                 sign_save = Sgl_signextendedsign(resultp1);
2471                 if (Sgl_iszero_hidden(resultp1)) {
2472                         /* Handle normalization */
2473                 /* A straightforward algorithm would now shift the
2474                  * result and extension left until the hidden bit
2475                  * becomes one.  Not all of the extension bits need
2476                  * participate in the shift.  Only the two most
2477                  * significant bits (round and guard) are needed.
2478                  * If only a single shift is needed then the guard
2479                  * bit becomes a significant low order bit and the
2480                  * extension must participate in the rounding.
2481                  * If more than a single shift is needed, then all
2482                  * bits to the right of the guard bit are zeros,
2483                  * and the guard bit may or may not be zero. */
2484                         Sglext_leftshiftby1(resultp1,resultp2);
2485
2486                         /* Need to check for a zero result.  The sign and
2487                          * exponent fields have already been zeroed.  The more
2488                          * efficient test of the full object can be used.
2489                          */
2490                          if (Sglext_iszero(resultp1,resultp2)) {
2491                                 /* Must have been "x-x" or "x+(-x)". */
2492                                 if (Is_rounding_mode(ROUNDMINUS))
2493                                         Sgl_setone_sign(resultp1);
2494                                 Sgl_copytoptr(resultp1,dstptr);
2495                                 return(NOEXCEPTION);
2496                         }
2497                         result_exponent--;
2498
2499                         /* Look to see if normalization is finished. */
2500                         if (Sgl_isone_hidden(resultp1)) {
2501                                 /* No further normalization is needed */
2502                                 goto round;
2503                         }
2504
2505                         /* Discover first one bit to determine shift amount.
2506                          * Use a modified binary search.  We have already
2507                          * shifted the result one position right and still
2508                          * not found a one so the remainder of the extension
2509                          * must be zero and simplifies rounding. */
2510                         /* Scan bytes */
2511                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512                                 Sglext_leftshiftby8(resultp1,resultp2);
2513                                 result_exponent -= 8;
2514                         }
2515                         /* Now narrow it down to the nibble */
2516                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517                                 /* The lower nibble contains the
2518                                  * normalizing one */
2519                                 Sglext_leftshiftby4(resultp1,resultp2);
2520                                 result_exponent -= 4;
2521                         }
2522                         /* Select case where first bit is set (already
2523                          * normalized) otherwise select the proper shift. */
2524                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525                         if (jumpsize <= 7) switch(jumpsize) {
2526                         case 1:
2527                                 Sglext_leftshiftby3(resultp1,resultp2);
2528                                 result_exponent -= 3;
2529                                 break;
2530                         case 2:
2531                         case 3:
2532                                 Sglext_leftshiftby2(resultp1,resultp2);
2533                                 result_exponent -= 2;
2534                                 break;
2535                         case 4:
2536                         case 5:
2537                         case 6:
2538                         case 7:
2539                                 Sglext_leftshiftby1(resultp1,resultp2);
2540                                 result_exponent -= 1;
2541                                 break;
2542                         }
2543                 } /* end if (hidden...)... */
2544         /* Fall through and round */
2545         } /* end if (save < 0)... */
2546         else {
2547                 /* Add magnitudes */
2548                 Sglext_addition(tmpresp1,tmpresp2,
2549                         rightp1,rightp2, /*to*/resultp1,resultp2);
2550                 sign_save = Sgl_signextendedsign(resultp1);
2551                 if (Sgl_isone_hiddenoverflow(resultp1)) {
2552                         /* Prenormalization required. */
2553                         Sglext_arithrightshiftby1(resultp1,resultp2);
2554                         result_exponent++;
2555                 } /* end if hiddenoverflow... */
2556         } /* end else ...add magnitudes... */
2557
2558         /* Round the result.  If the extension and lower two words are
2559          * all zeros, then the result is exact.  Otherwise round in the
2560          * correct direction.  Underflow is possible. If a postnormalization
2561          * is necessary, then the mantissa is all zeros so no shift is needed.
2562          */
2563   round:
2564         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566         }
2567         Sgl_set_sign(resultp1,/*using*/sign_save);
2568         if (Sglext_isnotzero_mantissap2(resultp2)) {
2569                 inexact = TRUE;
2570                 switch(Rounding_mode()) {
2571                 case ROUNDNEAREST: /* The default. */
2572                         if (Sglext_isone_highp2(resultp2)) {
2573                                 /* at least 1/2 ulp */
2574                                 if (Sglext_isnotzero_low31p2(resultp2) ||
2575                                     Sglext_isone_lowp1(resultp1)) {
2576                                         /* either exactly half way and odd or
2577                                          * more than 1/2ulp */
2578                                         Sgl_increment(resultp1);
2579                                 }
2580                         }
2581                         break;
2582
2583                 case ROUNDPLUS:
2584                         if (Sgl_iszero_sign(resultp1)) {
2585                                 /* Round up positive results */
2586                                 Sgl_increment(resultp1);
2587                         }
2588                         break;
2589
2590                 case ROUNDMINUS:
2591                         if (Sgl_isone_sign(resultp1)) {
2592                                 /* Round down negative results */
2593                                 Sgl_increment(resultp1);
2594                         }
2595
2596                 case ROUNDZERO:;
2597                         /* truncate is simple */
2598                 } /* end switch... */
2599                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600         }
2601         if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602                 /* Overflow */
2603                 if (Is_overflowtrap_enabled()) {
2604                         /*
2605                          * Adjust bias of result
2606                          */
2607                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608                         Sgl_copytoptr(resultp1,dstptr);
2609                         if (inexact)
2610                             if (Is_inexacttrap_enabled())
2611                                 return (OPC_2E_OVERFLOWEXCEPTION |
2612                                         OPC_2E_INEXACTEXCEPTION);
2613                             else Set_inexactflag();
2614                         return (OPC_2E_OVERFLOWEXCEPTION);
2615                 }
2616                 inexact = TRUE;
2617                 Set_overflowflag();
2618                 Sgl_setoverflow(resultp1);
2619         } else if (result_exponent <= 0) {      /* underflow case */
2620                 if (Is_underflowtrap_enabled()) {
2621                         /*
2622                          * Adjust bias of result
2623                          */
2624                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625                         Sgl_copytoptr(resultp1,dstptr);
2626                         if (inexact)
2627                             if (Is_inexacttrap_enabled())
2628                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2629                                         OPC_2E_INEXACTEXCEPTION);
2630                             else Set_inexactflag();
2631                         return(OPC_2E_UNDERFLOWEXCEPTION);
2632                 }
2633                 else if (inexact && is_tiny) Set_underflowflag();
2634         }
2635         else Sgl_set_exponent(resultp1,result_exponent);
2636         Sgl_copytoptr(resultp1,dstptr);
2637         if (inexact)
2638                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639                 else Set_inexactflag();
2640         return(NOEXCEPTION);
2641 }
2642