arch/parisc/math-emu/fmpyfadd.c

   1 /*
   2  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
   3  *
   4  * Floating-point emulation code
   5  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
   6  *
   7  *    This program is free software; you can redistribute it and/or modify
   8  *    it under the terms of the GNU General Public License as published by
   9  *    the Free Software Foundation; either version 2, or (at your option)
  10  *    any later version.
  11  *
  12  *    This program is distributed in the hope that it will be useful,
  13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *    GNU General Public License for more details.
  16  *
  17  *    You should have received a copy of the GNU General Public License
  18  *    along with this program; if not, write to the Free Software
  19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21 /*
  22  * BEGIN_DESC
  23  *
  24  *  File:
  25  *      @(#)    pa/spmath/fmpyfadd.c            $Revision: 1.1 $
  26  *
  27  *  Purpose:
  28  *      Double Floating-point Multiply Fused Add
  29  *      Double Floating-point Multiply Negate Fused Add
  30  *      Single Floating-point Multiply Fused Add
  31  *      Single Floating-point Multiply Negate Fused Add
  32  *
  33  *  External Interfaces:
  34  *      dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  35  *      dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  36  *      sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  37  *      sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  38  *
  39  *  Internal Interfaces:
  40  *
  41  *  Theory:
  42  *      <<please update with a overview of the operation of this file>>
  43  *
  44  * END_DESC
  45 */
  46
  47
  48 #include "float.h"
  49 #include "sgl_float.h"
  50 #include "dbl_float.h"
  51
  52
  53 /*
  54  *  Double Floating-point Multiply Fused Add
  55  */
  56
  57 int
  58 dbl_fmpyfadd(
  59             dbl_floating_point *src1ptr,
  60             dbl_floating_point *src2ptr,
  61             dbl_floating_point *src3ptr,
  62             unsigned int *status,
  63             dbl_floating_point *dstptr)
  64 {
  65         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
  66         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
  67         unsigned int rightp1, rightp2, rightp3, rightp4;
  68         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
  69         register int mpy_exponent, add_exponent, count;
  70         boolean inexact = FALSE, is_tiny = FALSE;
  71
  72         unsigned int signlessleft1, signlessright1, save;
  73         register int result_exponent, diff_exponent;
  74         int sign_save, jumpsize;
  75
  76         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
  77         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
  78         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
  79
  80         /*
  81          * set sign bit of result of multiply
  82          */
  83         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  84                 Dbl_setnegativezerop1(resultp1);
  85         else Dbl_setzerop1(resultp1);
  86
  87         /*
  88          * Generate multiply exponent
  89          */
  90         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
  91
  92         /*
  93          * check first operand for NaN's or infinity
  94          */
  95         if (Dbl_isinfinity_exponent(opnd1p1)) {
  96                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  97                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
  98                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
  99                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
 100                                         /*
 101                                          * invalid since operands are infinity
 102                                          * and zero
 103                                          */
 104                                         if (Is_invalidtrap_enabled())
 105                                                 return(OPC_2E_INVALIDEXCEPTION);
 106                                         Set_invalidflag();
 107                                         Dbl_makequietnan(resultp1,resultp2);
 108                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 109                                         return(NOEXCEPTION);
 110                                 }
 111                                 /*
 112                                  * Check third operand for infinity with a
 113                                  *  sign opposite of the multiply result
 114                                  */
 115                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 116                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 117                                         /*
 118                                          * invalid since attempting a magnitude
 119                                          * subtraction of infinities
 120                                          */
 121                                         if (Is_invalidtrap_enabled())
 122                                                 return(OPC_2E_INVALIDEXCEPTION);
 123                                         Set_invalidflag();
 124                                         Dbl_makequietnan(resultp1,resultp2);
 125                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 126                                         return(NOEXCEPTION);
 127                                 }
 128
 129                                 /*
 130                                  * return infinity
 131                                  */
 132                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 133                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 134                                 return(NOEXCEPTION);
 135                         }
 136                 }
 137                 else {
 138                         /*
 139                          * is NaN; signaling or quiet?
 140                          */
 141                         if (Dbl_isone_signaling(opnd1p1)) {
 142                                 /* trap if INVALIDTRAP enabled */
 143                                 if (Is_invalidtrap_enabled())
 144                                         return(OPC_2E_INVALIDEXCEPTION);
 145                                 /* make NaN quiet */
 146                                 Set_invalidflag();
 147                                 Dbl_set_quiet(opnd1p1);
 148                         }
 149                         /*
 150                          * is second operand a signaling NaN?
 151                          */
 152                         else if (Dbl_is_signalingnan(opnd2p1)) {
 153                                 /* trap if INVALIDTRAP enabled */
 154                                 if (Is_invalidtrap_enabled())
 155                                         return(OPC_2E_INVALIDEXCEPTION);
 156                                 /* make NaN quiet */
 157                                 Set_invalidflag();
 158                                 Dbl_set_quiet(opnd2p1);
 159                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 160                                 return(NOEXCEPTION);
 161                         }
 162                         /*
 163                          * is third operand a signaling NaN?
 164                          */
 165                         else if (Dbl_is_signalingnan(opnd3p1)) {
 166                                 /* trap if INVALIDTRAP enabled */
 167                                 if (Is_invalidtrap_enabled())
 168                                         return(OPC_2E_INVALIDEXCEPTION);
 169                                 /* make NaN quiet */
 170                                 Set_invalidflag();
 171                                 Dbl_set_quiet(opnd3p1);
 172                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 173                                 return(NOEXCEPTION);
 174                         }
 175                         /*
 176                          * return quiet NaN
 177                          */
 178                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 179                         return(NOEXCEPTION);
 180                 }
 181         }
 182
 183         /*
 184          * check second operand for NaN's or infinity
 185          */
 186         if (Dbl_isinfinity_exponent(opnd2p1)) {
 187                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 188                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 189                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 190                                         /*
 191                                          * invalid since multiply operands are
 192                                          * zero & infinity
 193                                          */
 194                                         if (Is_invalidtrap_enabled())
 195                                                 return(OPC_2E_INVALIDEXCEPTION);
 196                                         Set_invalidflag();
 197                                         Dbl_makequietnan(opnd2p1,opnd2p2);
 198                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 199                                         return(NOEXCEPTION);
 200                                 }
 201
 202                                 /*
 203                                  * Check third operand for infinity with a
 204                                  *  sign opposite of the multiply result
 205                                  */
 206                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 207                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 208                                         /*
 209                                          * invalid since attempting a magnitude
 210                                          * subtraction of infinities
 211                                          */
 212                                         if (Is_invalidtrap_enabled())
 213                                                 return(OPC_2E_INVALIDEXCEPTION);
 214                                         Set_invalidflag();
 215                                         Dbl_makequietnan(resultp1,resultp2);
 216                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 217                                         return(NOEXCEPTION);
 218                                 }
 219
 220                                 /*
 221                                  * return infinity
 222                                  */
 223                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 224                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 225                                 return(NOEXCEPTION);
 226                         }
 227                 }
 228                 else {
 229                         /*
 230                          * is NaN; signaling or quiet?
 231                          */
 232                         if (Dbl_isone_signaling(opnd2p1)) {
 233                                 /* trap if INVALIDTRAP enabled */
 234                                 if (Is_invalidtrap_enabled())
 235                                         return(OPC_2E_INVALIDEXCEPTION);
 236                                 /* make NaN quiet */
 237                                 Set_invalidflag();
 238                                 Dbl_set_quiet(opnd2p1);
 239                         }
 240                         /*
 241                          * is third operand a signaling NaN?
 242                          */
 243                         else if (Dbl_is_signalingnan(opnd3p1)) {
 244                                 /* trap if INVALIDTRAP enabled */
 245                                 if (Is_invalidtrap_enabled())
 246                                                 return(OPC_2E_INVALIDEXCEPTION);
 247                                 /* make NaN quiet */
 248                                 Set_invalidflag();
 249                                 Dbl_set_quiet(opnd3p1);
 250                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 251                                 return(NOEXCEPTION);
 252                         }
 253                         /*
 254                          * return quiet NaN
 255                          */
 256                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 257                         return(NOEXCEPTION);
 258                 }
 259         }
 260
 261         /*
 262          * check third operand for NaN's or infinity
 263          */
 264         if (Dbl_isinfinity_exponent(opnd3p1)) {
 265                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 266                         /* return infinity */
 267                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 268                         return(NOEXCEPTION);
 269                 } else {
 270                         /*
 271                          * is NaN; signaling or quiet?
 272                          */
 273                         if (Dbl_isone_signaling(opnd3p1)) {
 274                                 /* trap if INVALIDTRAP enabled */
 275                                 if (Is_invalidtrap_enabled())
 276                                         return(OPC_2E_INVALIDEXCEPTION);
 277                                 /* make NaN quiet */
 278                                 Set_invalidflag();
 279                                 Dbl_set_quiet(opnd3p1);
 280                         }
 281                         /*
 282                          * return quiet NaN
 283                          */
 284                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 285                         return(NOEXCEPTION);
 286                 }
 287         }
 288
 289         /*
 290          * Generate multiply mantissa
 291          */
 292         if (Dbl_isnotzero_exponent(opnd1p1)) {
 293                 /* set hidden bit */
 294                 Dbl_clear_signexponent_set_hidden(opnd1p1);
 295         }
 296         else {
 297                 /* check for zero */
 298                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 299                         /*
 300                          * Perform the add opnd3 with zero here.
 301                          */
 302                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 303                                 if (Is_rounding_mode(ROUNDMINUS)) {
 304                                         Dbl_or_signs(opnd3p1,resultp1);
 305                                 } else {
 306                                         Dbl_and_signs(opnd3p1,resultp1);
 307                                 }
 308                         }
 309                         /*
 310                          * Now let's check for trapped underflow case.
 311                          */
 312                         else if (Dbl_iszero_exponent(opnd3p1) &&
 313                                  Is_underflowtrap_enabled()) {
 314                                 /* need to normalize results mantissa */
 315                                 sign_save = Dbl_signextendedsign(opnd3p1);
 316                                 result_exponent = 0;
 317                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 318                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 319                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 320                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 321                                                         unfl);
 322                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 323                                 /* inexact = FALSE */
 324                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 325                         }
 326                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 327                         return(NOEXCEPTION);
 328                 }
 329                 /* is denormalized, adjust exponent */
 330                 Dbl_clear_signexponent(opnd1p1);
 331                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
 332                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 333         }
 334         /* opnd2 needs to have hidden bit set with msb in hidden bit */
 335         if (Dbl_isnotzero_exponent(opnd2p1)) {
 336                 Dbl_clear_signexponent_set_hidden(opnd2p1);
 337         }
 338         else {
 339                 /* check for zero */
 340                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 341                         /*
 342                          * Perform the add opnd3 with zero here.
 343                          */
 344                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 345                                 if (Is_rounding_mode(ROUNDMINUS)) {
 346                                         Dbl_or_signs(opnd3p1,resultp1);
 347                                 } else {
 348                                         Dbl_and_signs(opnd3p1,resultp1);
 349                                 }
 350                         }
 351                         /*
 352                          * Now let's check for trapped underflow case.
 353                          */
 354                         else if (Dbl_iszero_exponent(opnd3p1) &&
 355                             Is_underflowtrap_enabled()) {
 356                                 /* need to normalize results mantissa */
 357                                 sign_save = Dbl_signextendedsign(opnd3p1);
 358                                 result_exponent = 0;
 359                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 360                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 361                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 362                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 363                                                         unfl);
 364                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 365                                 /* inexact = FALSE */
 366                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 367                         }
 368                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 369                         return(NOEXCEPTION);
 370                 }
 371                 /* is denormalized; want to normalize */
 372                 Dbl_clear_signexponent(opnd2p1);
 373                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
 374                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
 375         }
 376
 377         /* Multiply the first two source mantissas together */
 378
 379         /*
 380          * The intermediate result will be kept in tmpres,
 381          * which needs enough room for 106 bits of mantissa,
 382          * so lets call it a Double extended.
 383          */
 384         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 385
 386         /*
 387          * Four bits at a time are inspected in each loop, and a
 388          * simple shift and add multiply algorithm is used.
 389          */
 390         for (count = DBL_P-1; count >= 0; count -= 4) {
 391                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 392                 if (Dbit28p2(opnd1p2)) {
 393                         /* Fourword_add should be an ADD followed by 3 ADDC's */
 394                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 395                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
 396                 }
 397                 if (Dbit29p2(opnd1p2)) {
 398                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 399                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
 400                 }
 401                 if (Dbit30p2(opnd1p2)) {
 402                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 403                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
 404                 }
 405                 if (Dbit31p2(opnd1p2)) {
 406                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 407                          opnd2p1, opnd2p2, 0, 0);
 408                 }
 409                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
 410         }
 411         if (Is_dexthiddenoverflow(tmpresp1)) {
 412                 /* result mantissa >= 2 (mantissa overflow) */
 413                 mpy_exponent++;
 414                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 415         }
 416
 417         /*
 418          * Restore the sign of the mpy result which was saved in resultp1.
 419          * The exponent will continue to be kept in mpy_exponent.
 420          */
 421         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
 422
 423         /*
 424          * No rounding is required, since the result of the multiply
 425          * is exact in the extended format.
 426          */
 427
 428         /*
 429          * Now we are ready to perform the add portion of the operation.
 430          *
 431          * The exponents need to be kept as integers for now, since the
 432          * multiply result might not fit into the exponent field.  We
 433          * can't overflow or underflow because of this yet, since the
 434          * add could bring the final result back into range.
 435          */
 436         add_exponent = Dbl_exponent(opnd3p1);
 437
 438         /*
 439          * Check for denormalized or zero add operand.
 440          */
 441         if (add_exponent == 0) {
 442                 /* check for zero */
 443                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 444                         /* right is zero */
 445                         /* Left can't be zero and must be result.
 446                          *
 447                          * The final result is now in tmpres and mpy_exponent,
 448                          * and needs to be rounded and squeezed back into
 449                          * double precision format from double extended.
 450                          */
 451                         result_exponent = mpy_exponent;
 452                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 453                                 resultp1,resultp2,resultp3,resultp4);
 454                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
 455                         goto round;
 456                 }
 457
 458                 /*
 459                  * Neither are zeroes.
 460                  * Adjust exponent and normalize add operand.
 461                  */
 462                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
 463                 Dbl_clear_signexponent(opnd3p1);
 464                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 465                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
 466                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
 467         } else {
 468                 Dbl_clear_exponent_set_hidden(opnd3p1);
 469         }
 470         /*
 471          * Copy opnd3 to the double extended variable called right.
 472          */
 473         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
 474
 475         /*
 476          * A zero "save" helps discover equal operands (for later),
 477          * and is used in swapping operands (if needed).
 478          */
 479         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
 480
 481         /*
 482          * Compare magnitude of operands.
 483          */
 484         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
 485         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
 486         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
 487             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
 488                 /*
 489                  * Set the left operand to the larger one by XOR swap.
 490                  * First finish the first word "save".
 491                  */
 492                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
 493                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
 494                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
 495                         rightp2,rightp3,rightp4);
 496                 /* also setup exponents used in rest of routine */
 497                 diff_exponent = add_exponent - mpy_exponent;
 498                 result_exponent = add_exponent;
 499         } else {
 500                 /* also setup exponents used in rest of routine */
 501                 diff_exponent = mpy_exponent - add_exponent;
 502                 result_exponent = mpy_exponent;
 503         }
 504         /* Invariant: left is not smaller than right. */
 505
 506         /*
 507          * Special case alignment of operands that would force alignment
 508          * beyond the extent of the extension.  A further optimization
 509          * could special case this but only reduces the path length for
 510          * this infrequent case.
 511          */
 512         if (diff_exponent > DBLEXT_THRESHOLD) {
 513                 diff_exponent = DBLEXT_THRESHOLD;
 514         }
 515
 516         /* Align right operand by shifting it to the right */
 517         Dblext_clear_sign(rightp1);
 518         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
 519                 /*shifted by*/diff_exponent);
 520
 521         /* Treat sum and difference of the operands separately. */
 522         if ((int)save < 0) {
 523                 /*
 524                  * Difference of the two operands.  Overflow can occur if the
 525                  * multiply overflowed.  A borrow can occur out of the hidden
 526                  * bit and force a post normalization phase.
 527                  */
 528                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 529                         rightp1,rightp2,rightp3,rightp4,
 530                         resultp1,resultp2,resultp3,resultp4);
 531                 sign_save = Dbl_signextendedsign(resultp1);
 532                 if (Dbl_iszero_hidden(resultp1)) {
 533                         /* Handle normalization */
 534                 /* A straightforward algorithm would now shift the
 535                  * result and extension left until the hidden bit
 536                  * becomes one.  Not all of the extension bits need
 537                  * participate in the shift.  Only the two most
 538                  * significant bits (round and guard) are needed.
 539                  * If only a single shift is needed then the guard
 540                  * bit becomes a significant low order bit and the
 541                  * extension must participate in the rounding.
 542                  * If more than a single shift is needed, then all
 543                  * bits to the right of the guard bit are zeros,
 544                  * and the guard bit may or may not be zero. */
 545                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 546                                 resultp4);
 547
 548                         /* Need to check for a zero result.  The sign and
 549                          * exponent fields have already been zeroed.  The more
 550                          * efficient test of the full object can be used.
 551                          */
 552                          if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
 553                                 /* Must have been "x-x" or "x+(-x)". */
 554                                 if (Is_rounding_mode(ROUNDMINUS))
 555                                         Dbl_setone_sign(resultp1);
 556                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 557                                 return(NOEXCEPTION);
 558                         }
 559                         result_exponent--;
 560
 561                         /* Look to see if normalization is finished. */
 562                         if (Dbl_isone_hidden(resultp1)) {
 563                                 /* No further normalization is needed */
 564                                 goto round;
 565                         }
 566
 567                         /* Discover first one bit to determine shift amount.
 568                          * Use a modified binary search.  We have already
 569                          * shifted the result one position right and still
 570                          * not found a one so the remainder of the extension
 571                          * must be zero and simplifies rounding. */
 572                         /* Scan bytes */
 573                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
 574                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
 575                                 result_exponent -= 8;
 576                         }
 577                         /* Now narrow it down to the nibble */
 578                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
 579                                 /* The lower nibble contains the
 580                                  * normalizing one */
 581                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
 582                                 result_exponent -= 4;
 583                         }
 584                         /* Select case where first bit is set (already
 585                          * normalized) otherwise select the proper shift. */
 586                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
 587                         if (jumpsize <= 7) switch(jumpsize) {
 588                         case 1:
 589                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
 590                                         resultp4);
 591                                 result_exponent -= 3;
 592                                 break;
 593                         case 2:
 594                         case 3:
 595                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
 596                                         resultp4);
 597                                 result_exponent -= 2;
 598                                 break;
 599                         case 4:
 600                         case 5:
 601                         case 6:
 602                         case 7:
 603                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 604                                         resultp4);
 605                                 result_exponent -= 1;
 606                                 break;
 607                         }
 608                 } /* end if (hidden...)... */
 609         /* Fall through and round */
 610         } /* end if (save < 0)... */
 611         else {
 612                 /* Add magnitudes */
 613                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 614                         rightp1,rightp2,rightp3,rightp4,
 615                         /*to*/resultp1,resultp2,resultp3,resultp4);
 616                 sign_save = Dbl_signextendedsign(resultp1);
 617                 if (Dbl_isone_hiddenoverflow(resultp1)) {
 618                         /* Prenormalization required. */
 619                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
 620                                 resultp4);
 621                         result_exponent++;
 622                 } /* end if hiddenoverflow... */
 623         } /* end else ...add magnitudes... */
 624
 625         /* Round the result.  If the extension and lower two words are
 626          * all zeros, then the result is exact.  Otherwise round in the
 627          * correct direction.  Underflow is possible. If a postnormalization
 628          * is necessary, then the mantissa is all zeros so no shift is needed.
 629          */
 630   round:
 631         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
 632                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
 633                         result_exponent,is_tiny);
 634         }
 635         Dbl_set_sign(resultp1,/*using*/sign_save);
 636         if (Dblext_isnotzero_mantissap3(resultp3) ||
 637             Dblext_isnotzero_mantissap4(resultp4)) {
 638                 inexact = TRUE;
 639                 switch(Rounding_mode()) {
 640                 case ROUNDNEAREST: /* The default. */
 641                         if (Dblext_isone_highp3(resultp3)) {
 642                                 /* at least 1/2 ulp */
 643                                 if (Dblext_isnotzero_low31p3(resultp3) ||
 644                                     Dblext_isnotzero_mantissap4(resultp4) ||
 645                                     Dblext_isone_lowp2(resultp2)) {
 646                                         /* either exactly half way and odd or
 647                                          * more than 1/2ulp */
 648                                         Dbl_increment(resultp1,resultp2);
 649                                 }
 650                         }
 651                         break;
 652
 653                 case ROUNDPLUS:
 654                         if (Dbl_iszero_sign(resultp1)) {
 655                                 /* Round up positive results */
 656                                 Dbl_increment(resultp1,resultp2);
 657                         }
 658                         break;
 659
 660                 case ROUNDMINUS:
 661                         if (Dbl_isone_sign(resultp1)) {
 662                                 /* Round down negative results */
 663                                 Dbl_increment(resultp1,resultp2);
 664                         }
 665
 666                 case ROUNDZERO:;
 667                         /* truncate is simple */
 668                 } /* end switch... */
 669                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
 670         }
 671         if (result_exponent >= DBL_INFINITY_EXPONENT) {
 672                 /* trap if OVERFLOWTRAP enabled */
 673                 if (Is_overflowtrap_enabled()) {
 674                         /*
 675                          * Adjust bias of result
 676                          */
 677                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
 678                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 679                         if (inexact)
 680                             if (Is_inexacttrap_enabled())
 681                                 return (OPC_2E_OVERFLOWEXCEPTION |
 682                                         OPC_2E_INEXACTEXCEPTION);
 683                             else Set_inexactflag();
 684                         return (OPC_2E_OVERFLOWEXCEPTION);
 685                 }
 686                 inexact = TRUE;
 687                 Set_overflowflag();
 688                 /* set result to infinity or largest number */
 689                 Dbl_setoverflow(resultp1,resultp2);
 690
 691         } else if (result_exponent <= 0) {      /* underflow case */
 692                 if (Is_underflowtrap_enabled()) {
 693                         /*
 694                          * Adjust bias of result
 695                          */
 696                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
 697                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 698                         if (inexact)
 699                             if (Is_inexacttrap_enabled())
 700                                 return (OPC_2E_UNDERFLOWEXCEPTION |
 701                                         OPC_2E_INEXACTEXCEPTION);
 702                             else Set_inexactflag();
 703                         return(OPC_2E_UNDERFLOWEXCEPTION);
 704                 }
 705                 else if (inexact && is_tiny) Set_underflowflag();
 706         }
 707         else Dbl_set_exponent(resultp1,result_exponent);
 708         Dbl_copytoptr(resultp1,resultp2,dstptr);
 709         if (inexact)
 710                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
 711                 else Set_inexactflag();
 712         return(NOEXCEPTION);
 713 }
 714
 715 /*
 716  *  Double Floating-point Multiply Negate Fused Add
 717  */
 718
 719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
 720
 721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
 722 unsigned int *status;
 723 {
 724         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
 725         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
 726         unsigned int rightp1, rightp2, rightp3, rightp4;
 727         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
 728         register int mpy_exponent, add_exponent, count;
 729         boolean inexact = FALSE, is_tiny = FALSE;
 730
 731         unsigned int signlessleft1, signlessright1, save;
 732         register int result_exponent, diff_exponent;
 733         int sign_save, jumpsize;
 734
 735         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
 736         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
 737         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
 738
 739         /*
 740          * set sign bit of result of multiply
 741          */
 742         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
 743                 Dbl_setzerop1(resultp1);
 744         else
 745                 Dbl_setnegativezerop1(resultp1);
 746
 747         /*
 748          * Generate multiply exponent
 749          */
 750         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
 751
 752         /*
 753          * check first operand for NaN's or infinity
 754          */
 755         if (Dbl_isinfinity_exponent(opnd1p1)) {
 756                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 757                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
 758                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
 759                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
 760                                         /*
 761                                          * invalid since operands are infinity
 762                                          * and zero
 763                                          */
 764                                         if (Is_invalidtrap_enabled())
 765                                                 return(OPC_2E_INVALIDEXCEPTION);
 766                                         Set_invalidflag();
 767                                         Dbl_makequietnan(resultp1,resultp2);
 768                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 769                                         return(NOEXCEPTION);
 770                                 }
 771                                 /*
 772                                  * Check third operand for infinity with a
 773                                  *  sign opposite of the multiply result
 774                                  */
 775                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 776                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 777                                         /*
 778                                          * invalid since attempting a magnitude
 779                                          * subtraction of infinities
 780                                          */
 781                                         if (Is_invalidtrap_enabled())
 782                                                 return(OPC_2E_INVALIDEXCEPTION);
 783                                         Set_invalidflag();
 784                                         Dbl_makequietnan(resultp1,resultp2);
 785                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 786                                         return(NOEXCEPTION);
 787                                 }
 788
 789                                 /*
 790                                  * return infinity
 791                                  */
 792                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 793                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 794                                 return(NOEXCEPTION);
 795                         }
 796                 }
 797                 else {
 798                         /*
 799                          * is NaN; signaling or quiet?
 800                          */
 801                         if (Dbl_isone_signaling(opnd1p1)) {
 802                                 /* trap if INVALIDTRAP enabled */
 803                                 if (Is_invalidtrap_enabled())
 804                                         return(OPC_2E_INVALIDEXCEPTION);
 805                                 /* make NaN quiet */
 806                                 Set_invalidflag();
 807                                 Dbl_set_quiet(opnd1p1);
 808                         }
 809                         /*
 810                          * is second operand a signaling NaN?
 811                          */
 812                         else if (Dbl_is_signalingnan(opnd2p1)) {
 813                                 /* trap if INVALIDTRAP enabled */
 814                                 if (Is_invalidtrap_enabled())
 815                                         return(OPC_2E_INVALIDEXCEPTION);
 816                                 /* make NaN quiet */
 817                                 Set_invalidflag();
 818                                 Dbl_set_quiet(opnd2p1);
 819                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 820                                 return(NOEXCEPTION);
 821                         }
 822                         /*
 823                          * is third operand a signaling NaN?
 824                          */
 825                         else if (Dbl_is_signalingnan(opnd3p1)) {
 826                                 /* trap if INVALIDTRAP enabled */
 827                                 if (Is_invalidtrap_enabled())
 828                                         return(OPC_2E_INVALIDEXCEPTION);
 829                                 /* make NaN quiet */
 830                                 Set_invalidflag();
 831                                 Dbl_set_quiet(opnd3p1);
 832                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 833                                 return(NOEXCEPTION);
 834                         }
 835                         /*
 836                          * return quiet NaN
 837                          */
 838                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 839                         return(NOEXCEPTION);
 840                 }
 841         }
 842
 843         /*
 844          * check second operand for NaN's or infinity
 845          */
 846         if (Dbl_isinfinity_exponent(opnd2p1)) {
 847                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 848                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 849                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 850                                         /*
 851                                          * invalid since multiply operands are
 852                                          * zero & infinity
 853                                          */
 854                                         if (Is_invalidtrap_enabled())
 855                                                 return(OPC_2E_INVALIDEXCEPTION);
 856                                         Set_invalidflag();
 857                                         Dbl_makequietnan(opnd2p1,opnd2p2);
 858                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 859                                         return(NOEXCEPTION);
 860                                 }
 861
 862                                 /*
 863                                  * Check third operand for infinity with a
 864                                  *  sign opposite of the multiply result
 865                                  */
 866                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 867                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 868                                         /*
 869                                          * invalid since attempting a magnitude
 870                                          * subtraction of infinities
 871                                          */
 872                                         if (Is_invalidtrap_enabled())
 873                                                 return(OPC_2E_INVALIDEXCEPTION);
 874                                         Set_invalidflag();
 875                                         Dbl_makequietnan(resultp1,resultp2);
 876                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
 877                                         return(NOEXCEPTION);
 878                                 }
 879
 880                                 /*
 881                                  * return infinity
 882                                  */
 883                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 884                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 885                                 return(NOEXCEPTION);
 886                         }
 887                 }
 888                 else {
 889                         /*
 890                          * is NaN; signaling or quiet?
 891                          */
 892                         if (Dbl_isone_signaling(opnd2p1)) {
 893                                 /* trap if INVALIDTRAP enabled */
 894                                 if (Is_invalidtrap_enabled())
 895                                         return(OPC_2E_INVALIDEXCEPTION);
 896                                 /* make NaN quiet */
 897                                 Set_invalidflag();
 898                                 Dbl_set_quiet(opnd2p1);
 899                         }
 900                         /*
 901                          * is third operand a signaling NaN?
 902                          */
 903                         else if (Dbl_is_signalingnan(opnd3p1)) {
 904                                 /* trap if INVALIDTRAP enabled */
 905                                 if (Is_invalidtrap_enabled())
 906                                                 return(OPC_2E_INVALIDEXCEPTION);
 907                                 /* make NaN quiet */
 908                                 Set_invalidflag();
 909                                 Dbl_set_quiet(opnd3p1);
 910                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 911                                 return(NOEXCEPTION);
 912                         }
 913                         /*
 914                          * return quiet NaN
 915                          */
 916                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 917                         return(NOEXCEPTION);
 918                 }
 919         }
 920
 921         /*
 922          * check third operand for NaN's or infinity
 923          */
 924         if (Dbl_isinfinity_exponent(opnd3p1)) {
 925                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 926                         /* return infinity */
 927                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 928                         return(NOEXCEPTION);
 929                 } else {
 930                         /*
 931                          * is NaN; signaling or quiet?
 932                          */
 933                         if (Dbl_isone_signaling(opnd3p1)) {
 934                                 /* trap if INVALIDTRAP enabled */
 935                                 if (Is_invalidtrap_enabled())
 936                                         return(OPC_2E_INVALIDEXCEPTION);
 937                                 /* make NaN quiet */
 938                                 Set_invalidflag();
 939                                 Dbl_set_quiet(opnd3p1);
 940                         }
 941                         /*
 942                          * return quiet NaN
 943                          */
 944                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 945                         return(NOEXCEPTION);
 946                 }
 947         }
 948
 949         /*
 950          * Generate multiply mantissa
 951          */
 952         if (Dbl_isnotzero_exponent(opnd1p1)) {
 953                 /* set hidden bit */
 954                 Dbl_clear_signexponent_set_hidden(opnd1p1);
 955         }
 956         else {
 957                 /* check for zero */
 958                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 959                         /*
 960                          * Perform the add opnd3 with zero here.
 961                          */
 962                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 963                                 if (Is_rounding_mode(ROUNDMINUS)) {
 964                                         Dbl_or_signs(opnd3p1,resultp1);
 965                                 } else {
 966                                         Dbl_and_signs(opnd3p1,resultp1);
 967                                 }
 968                         }
 969                         /*
 970                          * Now let's check for trapped underflow case.
 971                          */
 972                         else if (Dbl_iszero_exponent(opnd3p1) &&
 973                                  Is_underflowtrap_enabled()) {
 974                                 /* need to normalize results mantissa */
 975                                 sign_save = Dbl_signextendedsign(opnd3p1);
 976                                 result_exponent = 0;
 977                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
 978                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 979                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
 980                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 981                                                         unfl);
 982                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 983                                 /* inexact = FALSE */
 984                                 return(OPC_2E_UNDERFLOWEXCEPTION);
 985                         }
 986                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 987                         return(NOEXCEPTION);
 988                 }
 989                 /* is denormalized, adjust exponent */
 990                 Dbl_clear_signexponent(opnd1p1);
 991                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
 992                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 993         }
 994         /* opnd2 needs to have hidden bit set with msb in hidden bit */
 995         if (Dbl_isnotzero_exponent(opnd2p1)) {
 996                 Dbl_clear_signexponent_set_hidden(opnd2p1);
 997         }
 998         else {
 999                 /* check for zero */
1000                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001                         /*
1002                          * Perform the add opnd3 with zero here.
1003                          */
1004                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005                                 if (Is_rounding_mode(ROUNDMINUS)) {
1006                                         Dbl_or_signs(opnd3p1,resultp1);
1007                                 } else {
1008                                         Dbl_and_signs(opnd3p1,resultp1);
1009                                 }
1010                         }
1011                         /*
1012                          * Now let's check for trapped underflow case.
1013                          */
1014                         else if (Dbl_iszero_exponent(opnd3p1) &&
1015                             Is_underflowtrap_enabled()) {
1016                                 /* need to normalize results mantissa */
1017                                 sign_save = Dbl_signextendedsign(opnd3p1);
1018                                 result_exponent = 0;
1019                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023                                                         unfl);
1024                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025                                 /* inexact = FALSE */
1026                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1027                         }
1028                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029                         return(NOEXCEPTION);
1030                 }
1031                 /* is denormalized; want to normalize */
1032                 Dbl_clear_signexponent(opnd2p1);
1033                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035         }
1036
1037         /* Multiply the first two source mantissas together */
1038
1039         /*
1040          * The intermediate result will be kept in tmpres,
1041          * which needs enough room for 106 bits of mantissa,
1042          * so lets call it a Double extended.
1043          */
1044         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046         /*
1047          * Four bits at a time are inspected in each loop, and a
1048          * simple shift and add multiply algorithm is used.
1049          */
1050         for (count = DBL_P-1; count >= 0; count -= 4) {
1051                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052                 if (Dbit28p2(opnd1p2)) {
1053                         /* Fourword_add should be an ADD followed by 3 ADDC's */
1054                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056                 }
1057                 if (Dbit29p2(opnd1p2)) {
1058                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060                 }
1061                 if (Dbit30p2(opnd1p2)) {
1062                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064                 }
1065                 if (Dbit31p2(opnd1p2)) {
1066                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067                          opnd2p1, opnd2p2, 0, 0);
1068                 }
1069                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070         }
1071         if (Is_dexthiddenoverflow(tmpresp1)) {
1072                 /* result mantissa >= 2 (mantissa overflow) */
1073                 mpy_exponent++;
1074                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075         }
1076
1077         /*
1078          * Restore the sign of the mpy result which was saved in resultp1.
1079          * The exponent will continue to be kept in mpy_exponent.
1080          */
1081         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083         /*
1084          * No rounding is required, since the result of the multiply
1085          * is exact in the extended format.
1086          */
1087
1088         /*
1089          * Now we are ready to perform the add portion of the operation.
1090          *
1091          * The exponents need to be kept as integers for now, since the
1092          * multiply result might not fit into the exponent field.  We
1093          * can't overflow or underflow because of this yet, since the
1094          * add could bring the final result back into range.
1095          */
1096         add_exponent = Dbl_exponent(opnd3p1);
1097
1098         /*
1099          * Check for denormalized or zero add operand.
1100          */
1101         if (add_exponent == 0) {
1102                 /* check for zero */
1103                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104                         /* right is zero */
1105                         /* Left can't be zero and must be result.
1106                          *
1107                          * The final result is now in tmpres and mpy_exponent,
1108                          * and needs to be rounded and squeezed back into
1109                          * double precision format from double extended.
1110                          */
1111                         result_exponent = mpy_exponent;
1112                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113                                 resultp1,resultp2,resultp3,resultp4);
1114                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115                         goto round;
1116                 }
1117
1118                 /*
1119                  * Neither are zeroes.
1120                  * Adjust exponent and normalize add operand.
1121                  */
1122                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
1123                 Dbl_clear_signexponent(opnd3p1);
1124                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
1127         } else {
1128                 Dbl_clear_exponent_set_hidden(opnd3p1);
1129         }
1130         /*
1131          * Copy opnd3 to the double extended variable called right.
1132          */
1133         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135         /*
1136          * A zero "save" helps discover equal operands (for later),
1137          * and is used in swapping operands (if needed).
1138          */
1139         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141         /*
1142          * Compare magnitude of operands.
1143          */
1144         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148                 /*
1149                  * Set the left operand to the larger one by XOR swap.
1150                  * First finish the first word "save".
1151                  */
1152                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155                         rightp2,rightp3,rightp4);
1156                 /* also setup exponents used in rest of routine */
1157                 diff_exponent = add_exponent - mpy_exponent;
1158                 result_exponent = add_exponent;
1159         } else {
1160                 /* also setup exponents used in rest of routine */
1161                 diff_exponent = mpy_exponent - add_exponent;
1162                 result_exponent = mpy_exponent;
1163         }
1164         /* Invariant: left is not smaller than right. */
1165
1166         /*
1167          * Special case alignment of operands that would force alignment
1168          * beyond the extent of the extension.  A further optimization
1169          * could special case this but only reduces the path length for
1170          * this infrequent case.
1171          */
1172         if (diff_exponent > DBLEXT_THRESHOLD) {
1173                 diff_exponent = DBLEXT_THRESHOLD;
1174         }
1175
1176         /* Align right operand by shifting it to the right */
1177         Dblext_clear_sign(rightp1);
1178         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179                 /*shifted by*/diff_exponent);
1180
1181         /* Treat sum and difference of the operands separately. */
1182         if ((int)save < 0) {
1183                 /*
1184                  * Difference of the two operands.  Overflow can occur if the
1185                  * multiply overflowed.  A borrow can occur out of the hidden
1186                  * bit and force a post normalization phase.
1187                  */
1188                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189                         rightp1,rightp2,rightp3,rightp4,
1190                         resultp1,resultp2,resultp3,resultp4);
1191                 sign_save = Dbl_signextendedsign(resultp1);
1192                 if (Dbl_iszero_hidden(resultp1)) {
1193                         /* Handle normalization */
1194                 /* A straightforward algorithm would now shift the
1195                  * result and extension left until the hidden bit
1196                  * becomes one.  Not all of the extension bits need
1197                  * participate in the shift.  Only the two most
1198                  * significant bits (round and guard) are needed.
1199                  * If only a single shift is needed then the guard
1200                  * bit becomes a significant low order bit and the
1201                  * extension must participate in the rounding.
1202                  * If more than a single shift is needed, then all
1203                  * bits to the right of the guard bit are zeros,
1204                  * and the guard bit may or may not be zero. */
1205                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206                                 resultp4);
1207
1208                         /* Need to check for a zero result.  The sign and
1209                          * exponent fields have already been zeroed.  The more
1210                          * efficient test of the full object can be used.
1211                          */
1212                          if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213                                 /* Must have been "x-x" or "x+(-x)". */
1214                                 if (Is_rounding_mode(ROUNDMINUS))
1215                                         Dbl_setone_sign(resultp1);
1216                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
1217                                 return(NOEXCEPTION);
1218                         }
1219                         result_exponent--;
1220
1221                         /* Look to see if normalization is finished. */
1222                         if (Dbl_isone_hidden(resultp1)) {
1223                                 /* No further normalization is needed */
1224                                 goto round;
1225                         }
1226
1227                         /* Discover first one bit to determine shift amount.
1228                          * Use a modified binary search.  We have already
1229                          * shifted the result one position right and still
1230                          * not found a one so the remainder of the extension
1231                          * must be zero and simplifies rounding. */
1232                         /* Scan bytes */
1233                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235                                 result_exponent -= 8;
1236                         }
1237                         /* Now narrow it down to the nibble */
1238                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239                                 /* The lower nibble contains the
1240                                  * normalizing one */
1241                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242                                 result_exponent -= 4;
1243                         }
1244                         /* Select case where first bit is set (already
1245                          * normalized) otherwise select the proper shift. */
1246                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247                         if (jumpsize <= 7) switch(jumpsize) {
1248                         case 1:
1249                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250                                         resultp4);
1251                                 result_exponent -= 3;
1252                                 break;
1253                         case 2:
1254                         case 3:
1255                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256                                         resultp4);
1257                                 result_exponent -= 2;
1258                                 break;
1259                         case 4:
1260                         case 5:
1261                         case 6:
1262                         case 7:
1263                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264                                         resultp4);
1265                                 result_exponent -= 1;
1266                                 break;
1267                         }
1268                 } /* end if (hidden...)... */
1269         /* Fall through and round */
1270         } /* end if (save < 0)... */
1271         else {
1272                 /* Add magnitudes */
1273                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274                         rightp1,rightp2,rightp3,rightp4,
1275                         /*to*/resultp1,resultp2,resultp3,resultp4);
1276                 sign_save = Dbl_signextendedsign(resultp1);
1277                 if (Dbl_isone_hiddenoverflow(resultp1)) {
1278                         /* Prenormalization required. */
1279                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280                                 resultp4);
1281                         result_exponent++;
1282                 } /* end if hiddenoverflow... */
1283         } /* end else ...add magnitudes... */
1284
1285         /* Round the result.  If the extension and lower two words are
1286          * all zeros, then the result is exact.  Otherwise round in the
1287          * correct direction.  Underflow is possible. If a postnormalization
1288          * is necessary, then the mantissa is all zeros so no shift is needed.
1289          */
1290   round:
1291         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293                         result_exponent,is_tiny);
1294         }
1295         Dbl_set_sign(resultp1,/*using*/sign_save);
1296         if (Dblext_isnotzero_mantissap3(resultp3) ||
1297             Dblext_isnotzero_mantissap4(resultp4)) {
1298                 inexact = TRUE;
1299                 switch(Rounding_mode()) {
1300                 case ROUNDNEAREST: /* The default. */
1301                         if (Dblext_isone_highp3(resultp3)) {
1302                                 /* at least 1/2 ulp */
1303                                 if (Dblext_isnotzero_low31p3(resultp3) ||
1304                                     Dblext_isnotzero_mantissap4(resultp4) ||
1305                                     Dblext_isone_lowp2(resultp2)) {
1306                                         /* either exactly half way and odd or
1307                                          * more than 1/2ulp */
1308                                         Dbl_increment(resultp1,resultp2);
1309                                 }
1310                         }
1311                         break;
1312
1313                 case ROUNDPLUS:
1314                         if (Dbl_iszero_sign(resultp1)) {
1315                                 /* Round up positive results */
1316                                 Dbl_increment(resultp1,resultp2);
1317                         }
1318                         break;
1319
1320                 case ROUNDMINUS:
1321                         if (Dbl_isone_sign(resultp1)) {
1322                                 /* Round down negative results */
1323                                 Dbl_increment(resultp1,resultp2);
1324                         }
1325
1326                 case ROUNDZERO:;
1327                         /* truncate is simple */
1328                 } /* end switch... */
1329                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330         }
1331         if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332                 /* Overflow */
1333                 if (Is_overflowtrap_enabled()) {
1334                         /*
1335                          * Adjust bias of result
1336                          */
1337                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1339                         if (inexact)
1340                             if (Is_inexacttrap_enabled())
1341                                 return (OPC_2E_OVERFLOWEXCEPTION |
1342                                         OPC_2E_INEXACTEXCEPTION);
1343                             else Set_inexactflag();
1344                         return (OPC_2E_OVERFLOWEXCEPTION);
1345                 }
1346                 inexact = TRUE;
1347                 Set_overflowflag();
1348                 Dbl_setoverflow(resultp1,resultp2);
1349         } else if (result_exponent <= 0) {      /* underflow case */
1350                 if (Is_underflowtrap_enabled()) {
1351                         /*
1352                          * Adjust bias of result
1353                          */
1354                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1356                         if (inexact)
1357                             if (Is_inexacttrap_enabled())
1358                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1359                                         OPC_2E_INEXACTEXCEPTION);
1360                             else Set_inexactflag();
1361                         return(OPC_2E_UNDERFLOWEXCEPTION);
1362                 }
1363                 else if (inexact && is_tiny) Set_underflowflag();
1364         }
1365         else Dbl_set_exponent(resultp1,result_exponent);
1366         Dbl_copytoptr(resultp1,resultp2,dstptr);
1367         if (inexact)
1368                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369                 else Set_inexactflag();
1370         return(NOEXCEPTION);
1371 }
1372
1373 /*
1374  *  Single Floating-point Multiply Fused Add
1375  */
1376
1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380 unsigned int *status;
1381 {
1382         unsigned int opnd1, opnd2, opnd3;
1383         register unsigned int tmpresp1, tmpresp2;
1384         unsigned int rightp1, rightp2;
1385         unsigned int resultp1, resultp2 = 0;
1386         register int mpy_exponent, add_exponent, count;
1387         boolean inexact = FALSE, is_tiny = FALSE;
1388
1389         unsigned int signlessleft1, signlessright1, save;
1390         register int result_exponent, diff_exponent;
1391         int sign_save, jumpsize;
1392
1393         Sgl_copyfromptr(src1ptr,opnd1);
1394         Sgl_copyfromptr(src2ptr,opnd2);
1395         Sgl_copyfromptr(src3ptr,opnd3);
1396
1397         /*
1398          * set sign bit of result of multiply
1399          */
1400         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401                 Sgl_setnegativezero(resultp1);
1402         else Sgl_setzero(resultp1);
1403
1404         /*
1405          * Generate multiply exponent
1406          */
1407         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409         /*
1410          * check first operand for NaN's or infinity
1411          */
1412         if (Sgl_isinfinity_exponent(opnd1)) {
1413                 if (Sgl_iszero_mantissa(opnd1)) {
1414                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
1416                                         /*
1417                                          * invalid since operands are infinity
1418                                          * and zero
1419                                          */
1420                                         if (Is_invalidtrap_enabled())
1421                                                 return(OPC_2E_INVALIDEXCEPTION);
1422                                         Set_invalidflag();
1423                                         Sgl_makequietnan(resultp1);
1424                                         Sgl_copytoptr(resultp1,dstptr);
1425                                         return(NOEXCEPTION);
1426                                 }
1427                                 /*
1428                                  * Check third operand for infinity with a
1429                                  *  sign opposite of the multiply result
1430                                  */
1431                                 if (Sgl_isinfinity(opnd3) &&
1432                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433                                         /*
1434                                          * invalid since attempting a magnitude
1435                                          * subtraction of infinities
1436                                          */
1437                                         if (Is_invalidtrap_enabled())
1438                                                 return(OPC_2E_INVALIDEXCEPTION);
1439                                         Set_invalidflag();
1440                                         Sgl_makequietnan(resultp1);
1441                                         Sgl_copytoptr(resultp1,dstptr);
1442                                         return(NOEXCEPTION);
1443                                 }
1444
1445                                 /*
1446                                  * return infinity
1447                                  */
1448                                 Sgl_setinfinity_exponentmantissa(resultp1);
1449                                 Sgl_copytoptr(resultp1,dstptr);
1450                                 return(NOEXCEPTION);
1451                         }
1452                 }
1453                 else {
1454                         /*
1455                          * is NaN; signaling or quiet?
1456                          */
1457                         if (Sgl_isone_signaling(opnd1)) {
1458                                 /* trap if INVALIDTRAP enabled */
1459                                 if (Is_invalidtrap_enabled())
1460                                         return(OPC_2E_INVALIDEXCEPTION);
1461                                 /* make NaN quiet */
1462                                 Set_invalidflag();
1463                                 Sgl_set_quiet(opnd1);
1464                         }
1465                         /*
1466                          * is second operand a signaling NaN?
1467                          */
1468                         else if (Sgl_is_signalingnan(opnd2)) {
1469                                 /* trap if INVALIDTRAP enabled */
1470                                 if (Is_invalidtrap_enabled())
1471                                         return(OPC_2E_INVALIDEXCEPTION);
1472                                 /* make NaN quiet */
1473                                 Set_invalidflag();
1474                                 Sgl_set_quiet(opnd2);
1475                                 Sgl_copytoptr(opnd2,dstptr);
1476                                 return(NOEXCEPTION);
1477                         }
1478                         /*
1479                          * is third operand a signaling NaN?
1480                          */
1481                         else if (Sgl_is_signalingnan(opnd3)) {
1482                                 /* trap if INVALIDTRAP enabled */
1483                                 if (Is_invalidtrap_enabled())
1484                                         return(OPC_2E_INVALIDEXCEPTION);
1485                                 /* make NaN quiet */
1486                                 Set_invalidflag();
1487                                 Sgl_set_quiet(opnd3);
1488                                 Sgl_copytoptr(opnd3,dstptr);
1489                                 return(NOEXCEPTION);
1490                         }
1491                         /*
1492                          * return quiet NaN
1493                          */
1494                         Sgl_copytoptr(opnd1,dstptr);
1495                         return(NOEXCEPTION);
1496                 }
1497         }
1498
1499         /*
1500          * check second operand for NaN's or infinity
1501          */
1502         if (Sgl_isinfinity_exponent(opnd2)) {
1503                 if (Sgl_iszero_mantissa(opnd2)) {
1504                         if (Sgl_isnotnan(opnd3)) {
1505                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
1506                                         /*
1507                                          * invalid since multiply operands are
1508                                          * zero & infinity
1509                                          */
1510                                         if (Is_invalidtrap_enabled())
1511                                                 return(OPC_2E_INVALIDEXCEPTION);
1512                                         Set_invalidflag();
1513                                         Sgl_makequietnan(opnd2);
1514                                         Sgl_copytoptr(opnd2,dstptr);
1515                                         return(NOEXCEPTION);
1516                                 }
1517
1518                                 /*
1519                                  * Check third operand for infinity with a
1520                                  *  sign opposite of the multiply result
1521                                  */
1522                                 if (Sgl_isinfinity(opnd3) &&
1523                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524                                         /*
1525                                          * invalid since attempting a magnitude
1526                                          * subtraction of infinities
1527                                          */
1528                                         if (Is_invalidtrap_enabled())
1529                                                 return(OPC_2E_INVALIDEXCEPTION);
1530                                         Set_invalidflag();
1531                                         Sgl_makequietnan(resultp1);
1532                                         Sgl_copytoptr(resultp1,dstptr);
1533                                         return(NOEXCEPTION);
1534                                 }
1535
1536                                 /*
1537                                  * return infinity
1538                                  */
1539                                 Sgl_setinfinity_exponentmantissa(resultp1);
1540                                 Sgl_copytoptr(resultp1,dstptr);
1541                                 return(NOEXCEPTION);
1542                         }
1543                 }
1544                 else {
1545                         /*
1546                          * is NaN; signaling or quiet?
1547                          */
1548                         if (Sgl_isone_signaling(opnd2)) {
1549                                 /* trap if INVALIDTRAP enabled */
1550                                 if (Is_invalidtrap_enabled())
1551                                         return(OPC_2E_INVALIDEXCEPTION);
1552                                 /* make NaN quiet */
1553                                 Set_invalidflag();
1554                                 Sgl_set_quiet(opnd2);
1555                         }
1556                         /*
1557                          * is third operand a signaling NaN?
1558                          */
1559                         else if (Sgl_is_signalingnan(opnd3)) {
1560                                 /* trap if INVALIDTRAP enabled */
1561                                 if (Is_invalidtrap_enabled())
1562                                                 return(OPC_2E_INVALIDEXCEPTION);
1563                                 /* make NaN quiet */
1564                                 Set_invalidflag();
1565                                 Sgl_set_quiet(opnd3);
1566                                 Sgl_copytoptr(opnd3,dstptr);
1567                                 return(NOEXCEPTION);
1568                         }
1569                         /*
1570                          * return quiet NaN
1571                          */
1572                         Sgl_copytoptr(opnd2,dstptr);
1573                         return(NOEXCEPTION);
1574                 }
1575         }
1576
1577         /*
1578          * check third operand for NaN's or infinity
1579          */
1580         if (Sgl_isinfinity_exponent(opnd3)) {
1581                 if (Sgl_iszero_mantissa(opnd3)) {
1582                         /* return infinity */
1583                         Sgl_copytoptr(opnd3,dstptr);
1584                         return(NOEXCEPTION);
1585                 } else {
1586                         /*
1587                          * is NaN; signaling or quiet?
1588                          */
1589                         if (Sgl_isone_signaling(opnd3)) {
1590                                 /* trap if INVALIDTRAP enabled */
1591                                 if (Is_invalidtrap_enabled())
1592                                         return(OPC_2E_INVALIDEXCEPTION);
1593                                 /* make NaN quiet */
1594                                 Set_invalidflag();
1595                                 Sgl_set_quiet(opnd3);
1596                         }
1597                         /*
1598                          * return quiet NaN
1599                          */
1600                         Sgl_copytoptr(opnd3,dstptr);
1601                         return(NOEXCEPTION);
1602                 }
1603         }
1604
1605         /*
1606          * Generate multiply mantissa
1607          */
1608         if (Sgl_isnotzero_exponent(opnd1)) {
1609                 /* set hidden bit */
1610                 Sgl_clear_signexponent_set_hidden(opnd1);
1611         }
1612         else {
1613                 /* check for zero */
1614                 if (Sgl_iszero_mantissa(opnd1)) {
1615                         /*
1616                          * Perform the add opnd3 with zero here.
1617                          */
1618                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1619                                 if (Is_rounding_mode(ROUNDMINUS)) {
1620                                         Sgl_or_signs(opnd3,resultp1);
1621                                 } else {
1622                                         Sgl_and_signs(opnd3,resultp1);
1623                                 }
1624                         }
1625                         /*
1626                          * Now let's check for trapped underflow case.
1627                          */
1628                         else if (Sgl_iszero_exponent(opnd3) &&
1629                                  Is_underflowtrap_enabled()) {
1630                                 /* need to normalize results mantissa */
1631                                 sign_save = Sgl_signextendedsign(opnd3);
1632                                 result_exponent = 0;
1633                                 Sgl_leftshiftby1(opnd3);
1634                                 Sgl_normalize(opnd3,result_exponent);
1635                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1636                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1637                                                         unfl);
1638                                 Sgl_copytoptr(opnd3,dstptr);
1639                                 /* inexact = FALSE */
1640                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1641                         }
1642                         Sgl_copytoptr(opnd3,dstptr);
1643                         return(NOEXCEPTION);
1644                 }
1645                 /* is denormalized, adjust exponent */
1646                 Sgl_clear_signexponent(opnd1);
1647                 Sgl_leftshiftby1(opnd1);
1648                 Sgl_normalize(opnd1,mpy_exponent);
1649         }
1650         /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651         if (Sgl_isnotzero_exponent(opnd2)) {
1652                 Sgl_clear_signexponent_set_hidden(opnd2);
1653         }
1654         else {
1655                 /* check for zero */
1656                 if (Sgl_iszero_mantissa(opnd2)) {
1657                         /*
1658                          * Perform the add opnd3 with zero here.
1659                          */
1660                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1661                                 if (Is_rounding_mode(ROUNDMINUS)) {
1662                                         Sgl_or_signs(opnd3,resultp1);
1663                                 } else {
1664                                         Sgl_and_signs(opnd3,resultp1);
1665                                 }
1666                         }
1667                         /*
1668                          * Now let's check for trapped underflow case.
1669                          */
1670                         else if (Sgl_iszero_exponent(opnd3) &&
1671                             Is_underflowtrap_enabled()) {
1672                                 /* need to normalize results mantissa */
1673                                 sign_save = Sgl_signextendedsign(opnd3);
1674                                 result_exponent = 0;
1675                                 Sgl_leftshiftby1(opnd3);
1676                                 Sgl_normalize(opnd3,result_exponent);
1677                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1678                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1679                                                         unfl);
1680                                 Sgl_copytoptr(opnd3,dstptr);
1681                                 /* inexact = FALSE */
1682                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1683                         }
1684                         Sgl_copytoptr(opnd3,dstptr);
1685                         return(NOEXCEPTION);
1686                 }
1687                 /* is denormalized; want to normalize */
1688                 Sgl_clear_signexponent(opnd2);
1689                 Sgl_leftshiftby1(opnd2);
1690                 Sgl_normalize(opnd2,mpy_exponent);
1691         }
1692
1693         /* Multiply the first two source mantissas together */
1694
1695         /*
1696          * The intermediate result will be kept in tmpres,
1697          * which needs enough room for 106 bits of mantissa,
1698          * so lets call it a Double extended.
1699          */
1700         Sglext_setzero(tmpresp1,tmpresp2);
1701
1702         /*
1703          * Four bits at a time are inspected in each loop, and a
1704          * simple shift and add multiply algorithm is used.
1705          */
1706         for (count = SGL_P-1; count >= 0; count -= 4) {
1707                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708                 if (Sbit28(opnd1)) {
1709                         /* Twoword_add should be an ADD followed by 2 ADDC's */
1710                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711                 }
1712                 if (Sbit29(opnd1)) {
1713                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714                 }
1715                 if (Sbit30(opnd1)) {
1716                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717                 }
1718                 if (Sbit31(opnd1)) {
1719                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720                 }
1721                 Sgl_rightshiftby4(opnd1);
1722         }
1723         if (Is_sexthiddenoverflow(tmpresp1)) {
1724                 /* result mantissa >= 2 (mantissa overflow) */
1725                 mpy_exponent++;
1726                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727         } else {
1728                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729         }
1730
1731         /*
1732          * Restore the sign of the mpy result which was saved in resultp1.
1733          * The exponent will continue to be kept in mpy_exponent.
1734          */
1735         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737         /*
1738          * No rounding is required, since the result of the multiply
1739          * is exact in the extended format.
1740          */
1741
1742         /*
1743          * Now we are ready to perform the add portion of the operation.
1744          *
1745          * The exponents need to be kept as integers for now, since the
1746          * multiply result might not fit into the exponent field.  We
1747          * can't overflow or underflow because of this yet, since the
1748          * add could bring the final result back into range.
1749          */
1750         add_exponent = Sgl_exponent(opnd3);
1751
1752         /*
1753          * Check for denormalized or zero add operand.
1754          */
1755         if (add_exponent == 0) {
1756                 /* check for zero */
1757                 if (Sgl_iszero_mantissa(opnd3)) {
1758                         /* right is zero */
1759                         /* Left can't be zero and must be result.
1760                          *
1761                          * The final result is now in tmpres and mpy_exponent,
1762                          * and needs to be rounded and squeezed back into
1763                          * double precision format from double extended.
1764                          */
1765                         result_exponent = mpy_exponent;
1766                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768                         goto round;
1769                 }
1770
1771                 /*
1772                  * Neither are zeroes.
1773                  * Adjust exponent and normalize add operand.
1774                  */
1775                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
1776                 Sgl_clear_signexponent(opnd3);
1777                 Sgl_leftshiftby1(opnd3);
1778                 Sgl_normalize(opnd3,add_exponent);
1779                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
1780         } else {
1781                 Sgl_clear_exponent_set_hidden(opnd3);
1782         }
1783         /*
1784          * Copy opnd3 to the double extended variable called right.
1785          */
1786         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788         /*
1789          * A zero "save" helps discover equal operands (for later),
1790          * and is used in swapping operands (if needed).
1791          */
1792         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794         /*
1795          * Compare magnitude of operands.
1796          */
1797         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801                 /*
1802                  * Set the left operand to the larger one by XOR swap.
1803                  * First finish the first word "save".
1804                  */
1805                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807                 Sglext_swap_lower(tmpresp2,rightp2);
1808                 /* also setup exponents used in rest of routine */
1809                 diff_exponent = add_exponent - mpy_exponent;
1810                 result_exponent = add_exponent;
1811         } else {
1812                 /* also setup exponents used in rest of routine */
1813                 diff_exponent = mpy_exponent - add_exponent;
1814                 result_exponent = mpy_exponent;
1815         }
1816         /* Invariant: left is not smaller than right. */
1817
1818         /*
1819          * Special case alignment of operands that would force alignment
1820          * beyond the extent of the extension.  A further optimization
1821          * could special case this but only reduces the path length for
1822          * this infrequent case.
1823          */
1824         if (diff_exponent > SGLEXT_THRESHOLD) {
1825                 diff_exponent = SGLEXT_THRESHOLD;
1826         }
1827
1828         /* Align right operand by shifting it to the right */
1829         Sglext_clear_sign(rightp1);
1830         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831
1832         /* Treat sum and difference of the operands separately. */
1833         if ((int)save < 0) {
1834                 /*
1835                  * Difference of the two operands.  Overflow can occur if the
1836                  * multiply overflowed.  A borrow can occur out of the hidden
1837                  * bit and force a post normalization phase.
1838                  */
1839                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840                         resultp1,resultp2);
1841                 sign_save = Sgl_signextendedsign(resultp1);
1842                 if (Sgl_iszero_hidden(resultp1)) {
1843                         /* Handle normalization */
1844                 /* A straightforward algorithm would now shift the
1845                  * result and extension left until the hidden bit
1846                  * becomes one.  Not all of the extension bits need
1847                  * participate in the shift.  Only the two most
1848                  * significant bits (round and guard) are needed.
1849                  * If only a single shift is needed then the guard
1850                  * bit becomes a significant low order bit and the
1851                  * extension must participate in the rounding.
1852                  * If more than a single shift is needed, then all
1853                  * bits to the right of the guard bit are zeros,
1854                  * and the guard bit may or may not be zero. */
1855                         Sglext_leftshiftby1(resultp1,resultp2);
1856
1857                         /* Need to check for a zero result.  The sign and
1858                          * exponent fields have already been zeroed.  The more
1859                          * efficient test of the full object can be used.
1860                          */
1861                          if (Sglext_iszero(resultp1,resultp2)) {
1862                                 /* Must have been "x-x" or "x+(-x)". */
1863                                 if (Is_rounding_mode(ROUNDMINUS))
1864                                         Sgl_setone_sign(resultp1);
1865                                 Sgl_copytoptr(resultp1,dstptr);
1866                                 return(NOEXCEPTION);
1867                         }
1868                         result_exponent--;
1869
1870                         /* Look to see if normalization is finished. */
1871                         if (Sgl_isone_hidden(resultp1)) {
1872                                 /* No further normalization is needed */
1873                                 goto round;
1874                         }
1875
1876                         /* Discover first one bit to determine shift amount.
1877                          * Use a modified binary search.  We have already
1878                          * shifted the result one position right and still
1879                          * not found a one so the remainder of the extension
1880                          * must be zero and simplifies rounding. */
1881                         /* Scan bytes */
1882                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883                                 Sglext_leftshiftby8(resultp1,resultp2);
1884                                 result_exponent -= 8;
1885                         }
1886                         /* Now narrow it down to the nibble */
1887                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888                                 /* The lower nibble contains the
1889                                  * normalizing one */
1890                                 Sglext_leftshiftby4(resultp1,resultp2);
1891                                 result_exponent -= 4;
1892                         }
1893                         /* Select case where first bit is set (already
1894                          * normalized) otherwise select the proper shift. */
1895                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896                         if (jumpsize <= 7) switch(jumpsize) {
1897                         case 1:
1898                                 Sglext_leftshiftby3(resultp1,resultp2);
1899                                 result_exponent -= 3;
1900                                 break;
1901                         case 2:
1902                         case 3:
1903                                 Sglext_leftshiftby2(resultp1,resultp2);
1904                                 result_exponent -= 2;
1905                                 break;
1906                         case 4:
1907                         case 5:
1908                         case 6:
1909                         case 7:
1910                                 Sglext_leftshiftby1(resultp1,resultp2);
1911                                 result_exponent -= 1;
1912                                 break;
1913                         }
1914                 } /* end if (hidden...)... */
1915         /* Fall through and round */
1916         } /* end if (save < 0)... */
1917         else {
1918                 /* Add magnitudes */
1919                 Sglext_addition(tmpresp1,tmpresp2,
1920                         rightp1,rightp2, /*to*/resultp1,resultp2);
1921                 sign_save = Sgl_signextendedsign(resultp1);
1922                 if (Sgl_isone_hiddenoverflow(resultp1)) {
1923                         /* Prenormalization required. */
1924                         Sglext_arithrightshiftby1(resultp1,resultp2);
1925                         result_exponent++;
1926                 } /* end if hiddenoverflow... */
1927         } /* end else ...add magnitudes... */
1928
1929         /* Round the result.  If the extension and lower two words are
1930          * all zeros, then the result is exact.  Otherwise round in the
1931          * correct direction.  Underflow is possible. If a postnormalization
1932          * is necessary, then the mantissa is all zeros so no shift is needed.
1933          */
1934   round:
1935         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937         }
1938         Sgl_set_sign(resultp1,/*using*/sign_save);
1939         if (Sglext_isnotzero_mantissap2(resultp2)) {
1940                 inexact = TRUE;
1941                 switch(Rounding_mode()) {
1942                 case ROUNDNEAREST: /* The default. */
1943                         if (Sglext_isone_highp2(resultp2)) {
1944                                 /* at least 1/2 ulp */
1945                                 if (Sglext_isnotzero_low31p2(resultp2) ||
1946                                     Sglext_isone_lowp1(resultp1)) {
1947                                         /* either exactly half way and odd or
1948                                          * more than 1/2ulp */
1949                                         Sgl_increment(resultp1);
1950                                 }
1951                         }
1952                         break;
1953
1954                 case ROUNDPLUS:
1955                         if (Sgl_iszero_sign(resultp1)) {
1956                                 /* Round up positive results */
1957                                 Sgl_increment(resultp1);
1958                         }
1959                         break;
1960
1961                 case ROUNDMINUS:
1962                         if (Sgl_isone_sign(resultp1)) {
1963                                 /* Round down negative results */
1964                                 Sgl_increment(resultp1);
1965                         }
1966
1967                 case ROUNDZERO:;
1968                         /* truncate is simple */
1969                 } /* end switch... */
1970                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971         }
1972         if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973                 /* Overflow */
1974                 if (Is_overflowtrap_enabled()) {
1975                         /*
1976                          * Adjust bias of result
1977                          */
1978                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979                         Sgl_copytoptr(resultp1,dstptr);
1980                         if (inexact)
1981                             if (Is_inexacttrap_enabled())
1982                                 return (OPC_2E_OVERFLOWEXCEPTION |
1983                                         OPC_2E_INEXACTEXCEPTION);
1984                             else Set_inexactflag();
1985                         return (OPC_2E_OVERFLOWEXCEPTION);
1986                 }
1987                 inexact = TRUE;
1988                 Set_overflowflag();
1989                 Sgl_setoverflow(resultp1);
1990         } else if (result_exponent <= 0) {      /* underflow case */
1991                 if (Is_underflowtrap_enabled()) {
1992                         /*
1993                          * Adjust bias of result
1994                          */
1995                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996                         Sgl_copytoptr(resultp1,dstptr);
1997                         if (inexact)
1998                             if (Is_inexacttrap_enabled())
1999                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2000                                         OPC_2E_INEXACTEXCEPTION);
2001                             else Set_inexactflag();
2002                         return(OPC_2E_UNDERFLOWEXCEPTION);
2003                 }
2004                 else if (inexact && is_tiny) Set_underflowflag();
2005         }
2006         else Sgl_set_exponent(resultp1,result_exponent);
2007         Sgl_copytoptr(resultp1,dstptr);
2008         if (inexact)
2009                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010                 else Set_inexactflag();
2011         return(NOEXCEPTION);
2012 }
2013
2014 /*
2015  *  Single Floating-point Multiply Negate Fused Add
2016  */
2017
2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021 unsigned int *status;
2022 {
2023         unsigned int opnd1, opnd2, opnd3;
2024         register unsigned int tmpresp1, tmpresp2;
2025         unsigned int rightp1, rightp2;
2026         unsigned int resultp1, resultp2 = 0;
2027         register int mpy_exponent, add_exponent, count;
2028         boolean inexact = FALSE, is_tiny = FALSE;
2029
2030         unsigned int signlessleft1, signlessright1, save;
2031         register int result_exponent, diff_exponent;
2032         int sign_save, jumpsize;
2033
2034         Sgl_copyfromptr(src1ptr,opnd1);
2035         Sgl_copyfromptr(src2ptr,opnd2);
2036         Sgl_copyfromptr(src3ptr,opnd3);
2037
2038         /*
2039          * set sign bit of result of multiply
2040          */
2041         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042                 Sgl_setzero(resultp1);
2043         else
2044                 Sgl_setnegativezero(resultp1);
2045
2046         /*
2047          * Generate multiply exponent
2048          */
2049         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051         /*
2052          * check first operand for NaN's or infinity
2053          */
2054         if (Sgl_isinfinity_exponent(opnd1)) {
2055                 if (Sgl_iszero_mantissa(opnd1)) {
2056                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
2058                                         /*
2059                                          * invalid since operands are infinity
2060                                          * and zero
2061                                          */
2062                                         if (Is_invalidtrap_enabled())
2063                                                 return(OPC_2E_INVALIDEXCEPTION);
2064                                         Set_invalidflag();
2065                                         Sgl_makequietnan(resultp1);
2066                                         Sgl_copytoptr(resultp1,dstptr);
2067                                         return(NOEXCEPTION);
2068                                 }
2069                                 /*
2070                                  * Check third operand for infinity with a
2071                                  *  sign opposite of the multiply result
2072                                  */
2073                                 if (Sgl_isinfinity(opnd3) &&
2074                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075                                         /*
2076                                          * invalid since attempting a magnitude
2077                                          * subtraction of infinities
2078                                          */
2079                                         if (Is_invalidtrap_enabled())
2080                                                 return(OPC_2E_INVALIDEXCEPTION);
2081                                         Set_invalidflag();
2082                                         Sgl_makequietnan(resultp1);
2083                                         Sgl_copytoptr(resultp1,dstptr);
2084                                         return(NOEXCEPTION);
2085                                 }
2086
2087                                 /*
2088                                  * return infinity
2089                                  */
2090                                 Sgl_setinfinity_exponentmantissa(resultp1);
2091                                 Sgl_copytoptr(resultp1,dstptr);
2092                                 return(NOEXCEPTION);
2093                         }
2094                 }
2095                 else {
2096                         /*
2097                          * is NaN; signaling or quiet?
2098                          */
2099                         if (Sgl_isone_signaling(opnd1)) {
2100                                 /* trap if INVALIDTRAP enabled */
2101                                 if (Is_invalidtrap_enabled())
2102                                         return(OPC_2E_INVALIDEXCEPTION);
2103                                 /* make NaN quiet */
2104                                 Set_invalidflag();
2105                                 Sgl_set_quiet(opnd1);
2106                         }
2107                         /*
2108                          * is second operand a signaling NaN?
2109                          */
2110                         else if (Sgl_is_signalingnan(opnd2)) {
2111                                 /* trap if INVALIDTRAP enabled */
2112                                 if (Is_invalidtrap_enabled())
2113                                         return(OPC_2E_INVALIDEXCEPTION);
2114                                 /* make NaN quiet */
2115                                 Set_invalidflag();
2116                                 Sgl_set_quiet(opnd2);
2117                                 Sgl_copytoptr(opnd2,dstptr);
2118                                 return(NOEXCEPTION);
2119                         }
2120                         /*
2121                          * is third operand a signaling NaN?
2122                          */
2123                         else if (Sgl_is_signalingnan(opnd3)) {
2124                                 /* trap if INVALIDTRAP enabled */
2125                                 if (Is_invalidtrap_enabled())
2126                                         return(OPC_2E_INVALIDEXCEPTION);
2127                                 /* make NaN quiet */
2128                                 Set_invalidflag();
2129                                 Sgl_set_quiet(opnd3);
2130                                 Sgl_copytoptr(opnd3,dstptr);
2131                                 return(NOEXCEPTION);
2132                         }
2133                         /*
2134                          * return quiet NaN
2135                          */
2136                         Sgl_copytoptr(opnd1,dstptr);
2137                         return(NOEXCEPTION);
2138                 }
2139         }
2140
2141         /*
2142          * check second operand for NaN's or infinity
2143          */
2144         if (Sgl_isinfinity_exponent(opnd2)) {
2145                 if (Sgl_iszero_mantissa(opnd2)) {
2146                         if (Sgl_isnotnan(opnd3)) {
2147                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
2148                                         /*
2149                                          * invalid since multiply operands are
2150                                          * zero & infinity
2151                                          */
2152                                         if (Is_invalidtrap_enabled())
2153                                                 return(OPC_2E_INVALIDEXCEPTION);
2154                                         Set_invalidflag();
2155                                         Sgl_makequietnan(opnd2);
2156                                         Sgl_copytoptr(opnd2,dstptr);
2157                                         return(NOEXCEPTION);
2158                                 }
2159
2160                                 /*
2161                                  * Check third operand for infinity with a
2162                                  *  sign opposite of the multiply result
2163                                  */
2164                                 if (Sgl_isinfinity(opnd3) &&
2165                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166                                         /*
2167                                          * invalid since attempting a magnitude
2168                                          * subtraction of infinities
2169                                          */
2170                                         if (Is_invalidtrap_enabled())
2171                                                 return(OPC_2E_INVALIDEXCEPTION);
2172                                         Set_invalidflag();
2173                                         Sgl_makequietnan(resultp1);
2174                                         Sgl_copytoptr(resultp1,dstptr);
2175                                         return(NOEXCEPTION);
2176                                 }
2177
2178                                 /*
2179                                  * return infinity
2180                                  */
2181                                 Sgl_setinfinity_exponentmantissa(resultp1);
2182                                 Sgl_copytoptr(resultp1,dstptr);
2183                                 return(NOEXCEPTION);
2184                         }
2185                 }
2186                 else {
2187                         /*
2188                          * is NaN; signaling or quiet?
2189                          */
2190                         if (Sgl_isone_signaling(opnd2)) {
2191                                 /* trap if INVALIDTRAP enabled */
2192                                 if (Is_invalidtrap_enabled())
2193                                         return(OPC_2E_INVALIDEXCEPTION);
2194                                 /* make NaN quiet */
2195                                 Set_invalidflag();
2196                                 Sgl_set_quiet(opnd2);
2197                         }
2198                         /*
2199                          * is third operand a signaling NaN?
2200                          */
2201                         else if (Sgl_is_signalingnan(opnd3)) {
2202                                 /* trap if INVALIDTRAP enabled */
2203                                 if (Is_invalidtrap_enabled())
2204                                                 return(OPC_2E_INVALIDEXCEPTION);
2205                                 /* make NaN quiet */
2206                                 Set_invalidflag();
2207                                 Sgl_set_quiet(opnd3);
2208                                 Sgl_copytoptr(opnd3,dstptr);
2209                                 return(NOEXCEPTION);
2210                         }
2211                         /*
2212                          * return quiet NaN
2213                          */
2214                         Sgl_copytoptr(opnd2,dstptr);
2215                         return(NOEXCEPTION);
2216                 }
2217         }
2218
2219         /*
2220          * check third operand for NaN's or infinity
2221          */
2222         if (Sgl_isinfinity_exponent(opnd3)) {
2223                 if (Sgl_iszero_mantissa(opnd3)) {
2224                         /* return infinity */
2225                         Sgl_copytoptr(opnd3,dstptr);
2226                         return(NOEXCEPTION);
2227                 } else {
2228                         /*
2229                          * is NaN; signaling or quiet?
2230                          */
2231                         if (Sgl_isone_signaling(opnd3)) {
2232                                 /* trap if INVALIDTRAP enabled */
2233                                 if (Is_invalidtrap_enabled())
2234                                         return(OPC_2E_INVALIDEXCEPTION);
2235                                 /* make NaN quiet */
2236                                 Set_invalidflag();
2237                                 Sgl_set_quiet(opnd3);
2238                         }
2239                         /*
2240                          * return quiet NaN
2241                          */
2242                         Sgl_copytoptr(opnd3,dstptr);
2243                         return(NOEXCEPTION);
2244                 }
2245         }
2246
2247         /*
2248          * Generate multiply mantissa
2249          */
2250         if (Sgl_isnotzero_exponent(opnd1)) {
2251                 /* set hidden bit */
2252                 Sgl_clear_signexponent_set_hidden(opnd1);
2253         }
2254         else {
2255                 /* check for zero */
2256                 if (Sgl_iszero_mantissa(opnd1)) {
2257                         /*
2258                          * Perform the add opnd3 with zero here.
2259                          */
2260                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2261                                 if (Is_rounding_mode(ROUNDMINUS)) {
2262                                         Sgl_or_signs(opnd3,resultp1);
2263                                 } else {
2264                                         Sgl_and_signs(opnd3,resultp1);
2265                                 }
2266                         }
2267                         /*
2268                          * Now let's check for trapped underflow case.
2269                          */
2270                         else if (Sgl_iszero_exponent(opnd3) &&
2271                                  Is_underflowtrap_enabled()) {
2272                                 /* need to normalize results mantissa */
2273                                 sign_save = Sgl_signextendedsign(opnd3);
2274                                 result_exponent = 0;
2275                                 Sgl_leftshiftby1(opnd3);
2276                                 Sgl_normalize(opnd3,result_exponent);
2277                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2278                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2279                                                         unfl);
2280                                 Sgl_copytoptr(opnd3,dstptr);
2281                                 /* inexact = FALSE */
2282                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2283                         }
2284                         Sgl_copytoptr(opnd3,dstptr);
2285                         return(NOEXCEPTION);
2286                 }
2287                 /* is denormalized, adjust exponent */
2288                 Sgl_clear_signexponent(opnd1);
2289                 Sgl_leftshiftby1(opnd1);
2290                 Sgl_normalize(opnd1,mpy_exponent);
2291         }
2292         /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293         if (Sgl_isnotzero_exponent(opnd2)) {
2294                 Sgl_clear_signexponent_set_hidden(opnd2);
2295         }
2296         else {
2297                 /* check for zero */
2298                 if (Sgl_iszero_mantissa(opnd2)) {
2299                         /*
2300                          * Perform the add opnd3 with zero here.
2301                          */
2302                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2303                                 if (Is_rounding_mode(ROUNDMINUS)) {
2304                                         Sgl_or_signs(opnd3,resultp1);
2305                                 } else {
2306                                         Sgl_and_signs(opnd3,resultp1);
2307                                 }
2308                         }
2309                         /*
2310                          * Now let's check for trapped underflow case.
2311                          */
2312                         else if (Sgl_iszero_exponent(opnd3) &&
2313                             Is_underflowtrap_enabled()) {
2314                                 /* need to normalize results mantissa */
2315                                 sign_save = Sgl_signextendedsign(opnd3);
2316                                 result_exponent = 0;
2317                                 Sgl_leftshiftby1(opnd3);
2318                                 Sgl_normalize(opnd3,result_exponent);
2319                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2320                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2321                                                         unfl);
2322                                 Sgl_copytoptr(opnd3,dstptr);
2323                                 /* inexact = FALSE */
2324                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2325                         }
2326                         Sgl_copytoptr(opnd3,dstptr);
2327                         return(NOEXCEPTION);
2328                 }
2329                 /* is denormalized; want to normalize */
2330                 Sgl_clear_signexponent(opnd2);
2331                 Sgl_leftshiftby1(opnd2);
2332                 Sgl_normalize(opnd2,mpy_exponent);
2333         }
2334
2335         /* Multiply the first two source mantissas together */
2336
2337         /*
2338          * The intermediate result will be kept in tmpres,
2339          * which needs enough room for 106 bits of mantissa,
2340          * so lets call it a Double extended.
2341          */
2342         Sglext_setzero(tmpresp1,tmpresp2);
2343
2344         /*
2345          * Four bits at a time are inspected in each loop, and a
2346          * simple shift and add multiply algorithm is used.
2347          */
2348         for (count = SGL_P-1; count >= 0; count -= 4) {
2349                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350                 if (Sbit28(opnd1)) {
2351                         /* Twoword_add should be an ADD followed by 2 ADDC's */
2352                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353                 }
2354                 if (Sbit29(opnd1)) {
2355                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356                 }
2357                 if (Sbit30(opnd1)) {
2358                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359                 }
2360                 if (Sbit31(opnd1)) {
2361                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362                 }
2363                 Sgl_rightshiftby4(opnd1);
2364         }
2365         if (Is_sexthiddenoverflow(tmpresp1)) {
2366                 /* result mantissa >= 2 (mantissa overflow) */
2367                 mpy_exponent++;
2368                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369         } else {
2370                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371         }
2372
2373         /*
2374          * Restore the sign of the mpy result which was saved in resultp1.
2375          * The exponent will continue to be kept in mpy_exponent.
2376          */
2377         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379         /*
2380          * No rounding is required, since the result of the multiply
2381          * is exact in the extended format.
2382          */
2383
2384         /*
2385          * Now we are ready to perform the add portion of the operation.
2386          *
2387          * The exponents need to be kept as integers for now, since the
2388          * multiply result might not fit into the exponent field.  We
2389          * can't overflow or underflow because of this yet, since the
2390          * add could bring the final result back into range.
2391          */
2392         add_exponent = Sgl_exponent(opnd3);
2393
2394         /*
2395          * Check for denormalized or zero add operand.
2396          */
2397         if (add_exponent == 0) {
2398                 /* check for zero */
2399                 if (Sgl_iszero_mantissa(opnd3)) {
2400                         /* right is zero */
2401                         /* Left can't be zero and must be result.
2402                          *
2403                          * The final result is now in tmpres and mpy_exponent,
2404                          * and needs to be rounded and squeezed back into
2405                          * double precision format from double extended.
2406                          */
2407                         result_exponent = mpy_exponent;
2408                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410                         goto round;
2411                 }
2412
2413                 /*
2414                  * Neither are zeroes.
2415                  * Adjust exponent and normalize add operand.
2416                  */
2417                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
2418                 Sgl_clear_signexponent(opnd3);
2419                 Sgl_leftshiftby1(opnd3);
2420                 Sgl_normalize(opnd3,add_exponent);
2421                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
2422         } else {
2423                 Sgl_clear_exponent_set_hidden(opnd3);
2424         }
2425         /*
2426          * Copy opnd3 to the double extended variable called right.
2427          */
2428         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430         /*
2431          * A zero "save" helps discover equal operands (for later),
2432          * and is used in swapping operands (if needed).
2433          */
2434         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436         /*
2437          * Compare magnitude of operands.
2438          */
2439         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443                 /*
2444                  * Set the left operand to the larger one by XOR swap.
2445                  * First finish the first word "save".
2446                  */
2447                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449                 Sglext_swap_lower(tmpresp2,rightp2);
2450                 /* also setup exponents used in rest of routine */
2451                 diff_exponent = add_exponent - mpy_exponent;
2452                 result_exponent = add_exponent;
2453         } else {
2454                 /* also setup exponents used in rest of routine */
2455                 diff_exponent = mpy_exponent - add_exponent;
2456                 result_exponent = mpy_exponent;
2457         }
2458         /* Invariant: left is not smaller than right. */
2459
2460         /*
2461          * Special case alignment of operands that would force alignment
2462          * beyond the extent of the extension.  A further optimization
2463          * could special case this but only reduces the path length for
2464          * this infrequent case.
2465          */
2466         if (diff_exponent > SGLEXT_THRESHOLD) {
2467                 diff_exponent = SGLEXT_THRESHOLD;
2468         }
2469
2470         /* Align right operand by shifting it to the right */
2471         Sglext_clear_sign(rightp1);
2472         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473
2474         /* Treat sum and difference of the operands separately. */
2475         if ((int)save < 0) {
2476                 /*
2477                  * Difference of the two operands.  Overflow can occur if the
2478                  * multiply overflowed.  A borrow can occur out of the hidden
2479                  * bit and force a post normalization phase.
2480                  */
2481                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482                         resultp1,resultp2);
2483                 sign_save = Sgl_signextendedsign(resultp1);
2484                 if (Sgl_iszero_hidden(resultp1)) {
2485                         /* Handle normalization */
2486                 /* A straightforward algorithm would now shift the
2487                  * result and extension left until the hidden bit
2488                  * becomes one.  Not all of the extension bits need
2489                  * participate in the shift.  Only the two most
2490                  * significant bits (round and guard) are needed.
2491                  * If only a single shift is needed then the guard
2492                  * bit becomes a significant low order bit and the
2493                  * extension must participate in the rounding.
2494                  * If more than a single shift is needed, then all
2495                  * bits to the right of the guard bit are zeros,
2496                  * and the guard bit may or may not be zero. */
2497                         Sglext_leftshiftby1(resultp1,resultp2);
2498
2499                         /* Need to check for a zero result.  The sign and
2500                          * exponent fields have already been zeroed.  The more
2501                          * efficient test of the full object can be used.
2502                          */
2503                          if (Sglext_iszero(resultp1,resultp2)) {
2504                                 /* Must have been "x-x" or "x+(-x)". */
2505                                 if (Is_rounding_mode(ROUNDMINUS))
2506                                         Sgl_setone_sign(resultp1);
2507                                 Sgl_copytoptr(resultp1,dstptr);
2508                                 return(NOEXCEPTION);
2509                         }
2510                         result_exponent--;
2511
2512                         /* Look to see if normalization is finished. */
2513                         if (Sgl_isone_hidden(resultp1)) {
2514                                 /* No further normalization is needed */
2515                                 goto round;
2516                         }
2517
2518                         /* Discover first one bit to determine shift amount.
2519                          * Use a modified binary search.  We have already
2520                          * shifted the result one position right and still
2521                          * not found a one so the remainder of the extension
2522                          * must be zero and simplifies rounding. */
2523                         /* Scan bytes */
2524                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525                                 Sglext_leftshiftby8(resultp1,resultp2);
2526                                 result_exponent -= 8;
2527                         }
2528                         /* Now narrow it down to the nibble */
2529                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530                                 /* The lower nibble contains the
2531                                  * normalizing one */
2532                                 Sglext_leftshiftby4(resultp1,resultp2);
2533                                 result_exponent -= 4;
2534                         }
2535                         /* Select case where first bit is set (already
2536                          * normalized) otherwise select the proper shift. */
2537                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538                         if (jumpsize <= 7) switch(jumpsize) {
2539                         case 1:
2540                                 Sglext_leftshiftby3(resultp1,resultp2);
2541                                 result_exponent -= 3;
2542                                 break;
2543                         case 2:
2544                         case 3:
2545                                 Sglext_leftshiftby2(resultp1,resultp2);
2546                                 result_exponent -= 2;
2547                                 break;
2548                         case 4:
2549                         case 5:
2550                         case 6:
2551                         case 7:
2552                                 Sglext_leftshiftby1(resultp1,resultp2);
2553                                 result_exponent -= 1;
2554                                 break;
2555                         }
2556                 } /* end if (hidden...)... */
2557         /* Fall through and round */
2558         } /* end if (save < 0)... */
2559         else {
2560                 /* Add magnitudes */
2561                 Sglext_addition(tmpresp1,tmpresp2,
2562                         rightp1,rightp2, /*to*/resultp1,resultp2);
2563                 sign_save = Sgl_signextendedsign(resultp1);
2564                 if (Sgl_isone_hiddenoverflow(resultp1)) {
2565                         /* Prenormalization required. */
2566                         Sglext_arithrightshiftby1(resultp1,resultp2);
2567                         result_exponent++;
2568                 } /* end if hiddenoverflow... */
2569         } /* end else ...add magnitudes... */
2570
2571         /* Round the result.  If the extension and lower two words are
2572          * all zeros, then the result is exact.  Otherwise round in the
2573          * correct direction.  Underflow is possible. If a postnormalization
2574          * is necessary, then the mantissa is all zeros so no shift is needed.
2575          */
2576   round:
2577         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579         }
2580         Sgl_set_sign(resultp1,/*using*/sign_save);
2581         if (Sglext_isnotzero_mantissap2(resultp2)) {
2582                 inexact = TRUE;
2583                 switch(Rounding_mode()) {
2584                 case ROUNDNEAREST: /* The default. */
2585                         if (Sglext_isone_highp2(resultp2)) {
2586                                 /* at least 1/2 ulp */
2587                                 if (Sglext_isnotzero_low31p2(resultp2) ||
2588                                     Sglext_isone_lowp1(resultp1)) {
2589                                         /* either exactly half way and odd or
2590                                          * more than 1/2ulp */
2591                                         Sgl_increment(resultp1);
2592                                 }
2593                         }
2594                         break;
2595
2596                 case ROUNDPLUS:
2597                         if (Sgl_iszero_sign(resultp1)) {
2598                                 /* Round up positive results */
2599                                 Sgl_increment(resultp1);
2600                         }
2601                         break;
2602
2603                 case ROUNDMINUS:
2604                         if (Sgl_isone_sign(resultp1)) {
2605                                 /* Round down negative results */
2606                                 Sgl_increment(resultp1);
2607                         }
2608
2609                 case ROUNDZERO:;
2610                         /* truncate is simple */
2611                 } /* end switch... */
2612                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613         }
2614         if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615                 /* Overflow */
2616                 if (Is_overflowtrap_enabled()) {
2617                         /*
2618                          * Adjust bias of result
2619                          */
2620                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621                         Sgl_copytoptr(resultp1,dstptr);
2622                         if (inexact)
2623                             if (Is_inexacttrap_enabled())
2624                                 return (OPC_2E_OVERFLOWEXCEPTION |
2625                                         OPC_2E_INEXACTEXCEPTION);
2626                             else Set_inexactflag();
2627                         return (OPC_2E_OVERFLOWEXCEPTION);
2628                 }
2629                 inexact = TRUE;
2630                 Set_overflowflag();
2631                 Sgl_setoverflow(resultp1);
2632         } else if (result_exponent <= 0) {      /* underflow case */
2633                 if (Is_underflowtrap_enabled()) {
2634                         /*
2635                          * Adjust bias of result
2636                          */
2637                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638                         Sgl_copytoptr(resultp1,dstptr);
2639                         if (inexact)
2640                             if (Is_inexacttrap_enabled())
2641                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2642                                         OPC_2E_INEXACTEXCEPTION);
2643                             else Set_inexactflag();
2644                         return(OPC_2E_UNDERFLOWEXCEPTION);
2645                 }
2646                 else if (inexact && is_tiny) Set_underflowflag();
2647         }
2648         else Sgl_set_exponent(resultp1,result_exponent);
2649         Sgl_copytoptr(resultp1,dstptr);
2650         if (inexact)
2651                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652                 else Set_inexactflag();
2653         return(NOEXCEPTION);
2654 }
2655