sys/arch/hppa/spmath/dfadd.c

   1 /*      $NetBSD: dfadd.c,v 1.3 2005/12/11 12:17:40 christos Exp $       */
   2
   3 /*      $OpenBSD: dfadd.c,v 1.4 2001/03/29 03:58:17 mickey Exp $        */
   4
   5 /*
   6  * Copyright 1996 1995 by Open Software Foundation, Inc.
   7  *              All Rights Reserved
   8  *
   9  * Permission to use, copy, modify, and distribute this software and
  10  * its documentation for any purpose and without fee is hereby granted,
  11  * provided that the above copyright notice appears in all copies and
  12  * that both the copyright notice and this permission notice appear in
  13  * supporting documentation.
  14  *
  15  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
  16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  17  * FOR A PARTICULAR PURPOSE.
  18  *
  19  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
  20  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  21  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
  22  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  23  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  24  */
  25 /*
  26  * pmk1.1
  27  */
  28 /*
  29  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
  30  *
  31  * To anyone who acknowledges that this file is provided "AS IS"
  32  * without any express or implied warranty:
  33  *     permission to use, copy, modify, and distribute this file
  34  * for any purpose is hereby granted without fee, provided that
  35  * the above copyright notice and this notice appears in all
  36  * copies, and that the name of Hewlett-Packard Company not be
  37  * used in advertising or publicity pertaining to distribution
  38  * of the software without specific, written prior permission.
  39  * Hewlett-Packard Company makes no representations about the
  40  * suitability of this software for any purpose.
  41  */
  42
  43 #include <sys/cdefs.h>
  44 __KERNEL_RCSID(0, "$NetBSD: dfadd.c,v 1.3 2005/12/11 12:17:40 christos Exp $");
  45
  46 #include "../spmath/float.h"
  47 #include "../spmath/dbl_float.h"
  48
  49 /*
  50  * Double_add: add two double precision values.
  51  */
  52 int
  53 dbl_fadd(leftptr, rightptr, dstptr, status)
  54     dbl_floating_point *leftptr, *rightptr, *dstptr;
  55     unsigned int *status;
  56     {
  57     register unsigned int signless_upper_left, signless_upper_right, save;
  58     register unsigned int leftp1, leftp2, rightp1, rightp2, extent;
  59     register unsigned int resultp1 = 0, resultp2 = 0;
  60
  61     register int result_exponent, right_exponent, diff_exponent;
  62     register int sign_save, jumpsize;
  63     register int inexact = false;
  64     register int underflowtrap;
  65
  66     /* Create local copies of the numbers */
  67     Dbl_copyfromptr(leftptr,leftp1,leftp2);
  68     Dbl_copyfromptr(rightptr,rightp1,rightp2);
  69
  70     /* A zero "save" helps discover equal operands (for later), *
  71      * and is used in swapping operands (if needed).            */
  72     Dbl_xortointp1(leftp1,rightp1,/*to*/save);
  73
  74     /*
  75      * check first operand for NaN's or infinity
  76      */
  77     if ((result_exponent = Dbl_exponent(leftp1)) == DBL_INFINITY_EXPONENT)
  78         {
  79         if (Dbl_iszero_mantissa(leftp1,leftp2))
  80             {
  81             if (Dbl_isnotnan(rightp1,rightp2))
  82                 {
  83                 if (Dbl_isinfinity(rightp1,rightp2) && save!=0)
  84                     {
  85                     /*
  86                      * invalid since operands are opposite signed infinity's
  87                      */
  88                     if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
  89                     Set_invalidflag();
  90                     Dbl_makequietnan(resultp1,resultp2);
  91                     Dbl_copytoptr(resultp1,resultp2,dstptr);
  92                     return(NOEXCEPTION);
  93                     }
  94                 /*
  95                  * return infinity
  96                  */
  97                 Dbl_copytoptr(leftp1,leftp2,dstptr);
  98                 return(NOEXCEPTION);
  99                 }
 100             }
 101         else
 102             {
 103             /*
 104              * is NaN; signaling or quiet?
 105              */
 106             if (Dbl_isone_signaling(leftp1))
 107                 {
 108                 /* trap if INVALIDTRAP enabled */
 109                 if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
 110                 /* make NaN quiet */
 111                 Set_invalidflag();
 112                 Dbl_set_quiet(leftp1);
 113             }
 114             /*
 115              * is second operand a signaling NaN?
 116              */
 117             else if (Dbl_is_signalingnan(rightp1))
 118                 {
 119                 /* trap if INVALIDTRAP enabled */
 120                 if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
 121                 /* make NaN quiet */
 122                 Set_invalidflag();
 123                 Dbl_set_quiet(rightp1);
 124                 Dbl_copytoptr(rightp1,rightp2,dstptr);
 125                 return(NOEXCEPTION);
 126                 }
 127             /*
 128              * return quiet NaN
 129              */
 130             Dbl_copytoptr(leftp1,leftp2,dstptr);
 131             return(NOEXCEPTION);
 132             }
 133         } /* End left NaN or Infinity processing */
 134     /*
 135      * check second operand for NaN's or infinity
 136      */
 137     if (Dbl_isinfinity_exponent(rightp1))
 138         {
 139         if (Dbl_iszero_mantissa(rightp1,rightp2))
 140             {
 141             /* return infinity */
 142             Dbl_copytoptr(rightp1,rightp2,dstptr);
 143             return(NOEXCEPTION);
 144             }
 145         /*
 146          * is NaN; signaling or quiet?
 147          */
 148         if (Dbl_isone_signaling(rightp1))
 149             {
 150             /* trap if INVALIDTRAP enabled */
 151             if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
 152             /* make NaN quiet */
 153             Set_invalidflag();
 154             Dbl_set_quiet(rightp1);
 155             }
 156         /*
 157          * return quiet NaN
 158          */
 159         Dbl_copytoptr(rightp1,rightp2,dstptr);
 160         return(NOEXCEPTION);
 161         } /* End right NaN or Infinity processing */
 162
 163     /* Invariant: Must be dealing with finite numbers */
 164
 165     /* Compare operands by removing the sign */
 166     Dbl_copytoint_exponentmantissap1(leftp1,signless_upper_left);
 167     Dbl_copytoint_exponentmantissap1(rightp1,signless_upper_right);
 168
 169     /* sign difference selects add or sub operation. */
 170     if(Dbl_ismagnitudeless(leftp2,rightp2,signless_upper_left,signless_upper_right))
 171         {
 172         /* Set the left operand to the larger one by XOR swap   *
 173          *  First finish the first word using "save"            */
 174         Dbl_xorfromintp1(save,rightp1,/*to*/rightp1);
 175         Dbl_xorfromintp1(save,leftp1,/*to*/leftp1);
 176         Dbl_swap_lower(leftp2,rightp2);
 177         result_exponent = Dbl_exponent(leftp1);
 178         }
 179     /* Invariant:  left is not smaller than right. */
 180
 181     if((right_exponent = Dbl_exponent(rightp1)) == 0)
 182         {
 183         /* Denormalized operands.  First look for zeroes */
 184         if(Dbl_iszero_mantissa(rightp1,rightp2))
 185             {
 186             /* right is zero */
 187             if(Dbl_iszero_exponentmantissa(leftp1,leftp2))
 188                 {
 189                 /* Both operands are zeros */
 190                 if(Is_rounding_mode(ROUNDMINUS))
 191                     {
 192                     Dbl_or_signs(leftp1,/*with*/rightp1);
 193                     }
 194                 else
 195                     {
 196                     Dbl_and_signs(leftp1,/*with*/rightp1);
 197                     }
 198                 }
 199             else
 200                 {
 201                 /* Left is not a zero and must be the result.  Trapped
 202                  * underflows are signaled if left is denormalized.  Result
 203                  * is always exact. */
 204                 if( (result_exponent == 0) && Is_underflowtrap_enabled() )
 205                     {
 206                     /* need to normalize results mantissa */
 207                     sign_save = Dbl_signextendedsign(leftp1);
 208                     Dbl_leftshiftby1(leftp1,leftp2);
 209                     Dbl_normalize(leftp1,leftp2,result_exponent);
 210                     Dbl_set_sign(leftp1,/*using*/sign_save);
 211                     Dbl_setwrapped_exponent(leftp1,result_exponent,unfl);
 212                     Dbl_copytoptr(leftp1,leftp2,dstptr);
 213                     /* inexact = false */
 214                     return(UNDERFLOWEXCEPTION);
 215                     }
 216                 }
 217             Dbl_copytoptr(leftp1,leftp2,dstptr);
 218             return(NOEXCEPTION);
 219             }
 220
 221         /* Neither are zeroes */
 222         Dbl_clear_sign(rightp1);        /* Exponent is already cleared */
 223         if(result_exponent == 0 )
 224             {
 225             /* Both operands are denormalized.  The result must be exact
 226              * and is simply calculated.  A sum could become normalized and a
 227              * difference could cancel to a true zero. */
 228             if( (/*signed*/int) save < 0 )
 229                 {
 230                 Dbl_subtract(leftp1,leftp2,/*minus*/rightp1,rightp2,
 231                 /*into*/resultp1,resultp2);
 232                 if(Dbl_iszero_mantissa(resultp1,resultp2))
 233                     {
 234                     if(Is_rounding_mode(ROUNDMINUS))
 235                         {
 236                         Dbl_setone_sign(resultp1);
 237                         }
 238                     else
 239                         {
 240                         Dbl_setzero_sign(resultp1);
 241                         }
 242                     Dbl_copytoptr(resultp1,resultp2,dstptr);
 243                     return(NOEXCEPTION);
 244                     }
 245                 }
 246             else
 247                 {
 248                 Dbl_addition(leftp1,leftp2,rightp1,rightp2,
 249                 /*into*/resultp1,resultp2);
 250                 if(Dbl_isone_hidden(resultp1))
 251                     {
 252                     Dbl_copytoptr(resultp1,resultp2,dstptr);
 253                     return(NOEXCEPTION);
 254                     }
 255                 }
 256             if(Is_underflowtrap_enabled())
 257                 {
 258                 /* need to normalize result */
 259                 sign_save = Dbl_signextendedsign(resultp1);
 260                 Dbl_leftshiftby1(resultp1,resultp2);
 261                 Dbl_normalize(resultp1,resultp2,result_exponent);
 262                 Dbl_set_sign(resultp1,/*using*/sign_save);
 263                 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
 264                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 265                 /* inexact = false */
 266                 return(UNDERFLOWEXCEPTION);
 267                 }
 268             Dbl_copytoptr(resultp1,resultp2,dstptr);
 269             return(NOEXCEPTION);
 270             }
 271         right_exponent = 1;     /* Set exponent to reflect different bias
 272                                  * with denomalized numbers. */
 273         }
 274     else
 275         {
 276         Dbl_clear_signexponent_set_hidden(rightp1);
 277         }
 278     Dbl_clear_exponent_set_hidden(leftp1);
 279     diff_exponent = result_exponent - right_exponent;
 280
 281     /*
 282      * Special case alignment of operands that would force alignment
 283      * beyond the extent of the extension.  A further optimization
 284      * could special case this but only reduces the path length for this
 285      * infrequent case.
 286      */
 287     if(diff_exponent > DBL_THRESHOLD)
 288         {
 289         diff_exponent = DBL_THRESHOLD;
 290         }
 291
 292     /* Align right operand by shifting to right */
 293     Dbl_right_align(/*operand*/rightp1,rightp2,/*shifted by*/diff_exponent,
 294     /*and lower to*/extent);
 295
 296     /* Treat sum and difference of the operands separately. */
 297     if( (/*signed*/int) save < 0 )
 298         {
 299         /*
 300          * Difference of the two operands.  Their can be no overflow.  A
 301          * borrow can occur out of the hidden bit and force a post
 302          * normalization phase.
 303          */
 304         Dbl_subtract_withextension(leftp1,leftp2,/*minus*/rightp1,rightp2,
 305         /*with*/extent,/*into*/resultp1,resultp2);
 306         if(Dbl_iszero_hidden(resultp1))
 307             {
 308             /* Handle normalization */
 309             /* A straight foward algorithm would now shift the result
 310              * and extension left until the hidden bit becomes one.  Not
 311              * all of the extension bits need participate in the shift.
 312              * Only the two most significant bits (round and guard) are
 313              * needed.  If only a single shift is needed then the guard
 314              * bit becomes a significant low order bit and the extension
 315              * must participate in the rounding.  If more than a single
 316              * shift is needed, then all bits to the right of the guard
 317              * bit are zeros, and the guard bit may or may not be zero. */
 318             sign_save = Dbl_signextendedsign(resultp1);
 319             Dbl_leftshiftby1_withextent(resultp1,resultp2,extent,resultp1,resultp2);
 320
 321             /* Need to check for a zero result.  The sign and exponent
 322              * fields have already been zeroed.  The more efficient test
 323              * of the full object can be used.
 324              */
 325             if(Dbl_iszero(resultp1,resultp2))
 326                 /* Must have been "x-x" or "x+(-x)". */
 327                 {
 328                 if(Is_rounding_mode(ROUNDMINUS)) Dbl_setone_sign(resultp1);
 329                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 330                 return(NOEXCEPTION);
 331                 }
 332             result_exponent--;
 333             /* Look to see if normalization is finished. */
 334             if(Dbl_isone_hidden(resultp1))
 335                 {
 336                 if(result_exponent==0)
 337                     {
 338                     /* Denormalized, exponent should be zero.  Left operand *
 339                      * was normalized, so extent (guard, round) was zero    */
 340                     goto underflow;
 341                     }
 342                 else
 343                     {
 344                     /* No further normalization is needed. */
 345                     Dbl_set_sign(resultp1,/*using*/sign_save);
 346                     Ext_leftshiftby1(extent);
 347                     goto round;
 348                     }
 349                 }
 350
 351             /* Check for denormalized, exponent should be zero.  Left    *
 352              * operand was normalized, so extent (guard, round) was zero */
 353             if(!(underflowtrap = Is_underflowtrap_enabled()) &&
 354                result_exponent==0) goto underflow;
 355
 356             /* Shift extension to complete one bit of normalization and
 357              * update exponent. */
 358             Ext_leftshiftby1(extent);
 359
 360             /* Discover first one bit to determine shift amount.  Use a
 361              * modified binary search.  We have already shifted the result
 362              * one position right and still not found a one so the remainder
 363              * of the extension must be zero and simplifies rounding. */
 364             /* Scan bytes */
 365             while(Dbl_iszero_hiddenhigh7mantissa(resultp1))
 366                 {
 367                 Dbl_leftshiftby8(resultp1,resultp2);
 368                 if((result_exponent -= 8) <= 0  && !underflowtrap)
 369                     goto underflow;
 370                 }
 371             /* Now narrow it down to the nibble */
 372             if(Dbl_iszero_hiddenhigh3mantissa(resultp1))
 373                 {
 374                 /* The lower nibble contains the normalizing one */
 375                 Dbl_leftshiftby4(resultp1,resultp2);
 376                 if((result_exponent -= 4) <= 0 && !underflowtrap)
 377                     goto underflow;
 378                 }
 379             /* Select case were first bit is set (already normalized)
 380              * otherwise select the proper shift. */
 381             if((jumpsize = Dbl_hiddenhigh3mantissa(resultp1)) > 7)
 382                 {
 383                 /* Already normalized */
 384                 if(result_exponent <= 0) goto underflow;
 385                 Dbl_set_sign(resultp1,/*using*/sign_save);
 386                 Dbl_set_exponent(resultp1,/*using*/result_exponent);
 387                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 388                 return(NOEXCEPTION);
 389                 }
 390             Dbl_sethigh4bits(resultp1,/*using*/sign_save);
 391             switch(jumpsize)
 392                 {
 393                 case 1:
 394                     {
 395                     Dbl_leftshiftby3(resultp1,resultp2);
 396                     result_exponent -= 3;
 397                     break;
 398                     }
 399                 case 2:
 400                 case 3:
 401                     {
 402                     Dbl_leftshiftby2(resultp1,resultp2);
 403                     result_exponent -= 2;
 404                     break;
 405                     }
 406                 case 4:
 407                 case 5:
 408                 case 6:
 409                 case 7:
 410                     {
 411                     Dbl_leftshiftby1(resultp1,resultp2);
 412                     result_exponent -= 1;
 413                     break;
 414                     }
 415                 }
 416             if(result_exponent > 0)
 417                 {
 418                 Dbl_set_exponent(resultp1,/*using*/result_exponent);
 419                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 420                 return(NOEXCEPTION);    /* Sign bit is already set */
 421                 }
 422             /* Fixup potential underflows */
 423           underflow:
 424             if(Is_underflowtrap_enabled())
 425                 {
 426                 Dbl_set_sign(resultp1,sign_save);
 427                 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
 428                 Dbl_copytoptr(resultp1,resultp2,dstptr);
 429                 /* inexact = false */
 430                 return(UNDERFLOWEXCEPTION);
 431                 }
 432             /*
 433              * Since we cannot get an inexact denormalized result,
 434              * we can now return.
 435              */
 436             Dbl_fix_overshift(resultp1,resultp2,(1-result_exponent),extent);
 437             Dbl_clear_signexponent(resultp1);
 438             Dbl_set_sign(resultp1,sign_save);
 439             Dbl_copytoptr(resultp1,resultp2,dstptr);
 440             return(NOEXCEPTION);
 441             } /* end if(hidden...)... */
 442         /* Fall through and round */
 443         } /* end if(save < 0)... */
 444     else
 445         {
 446         /* Add magnitudes */
 447         Dbl_addition(leftp1,leftp2,rightp1,rightp2,/*to*/resultp1,resultp2);
 448         if(Dbl_isone_hiddenoverflow(resultp1))
 449             {
 450             /* Prenormalization required. */
 451             Dbl_rightshiftby1_withextent(resultp2,extent,extent);
 452             Dbl_arithrightshiftby1(resultp1,resultp2);
 453             result_exponent++;
 454             } /* end if hiddenoverflow... */
 455         } /* end else ...add magnitudes... */
 456
 457     /* Round the result.  If the extension is all zeros,then the result is
 458      * exact.  Otherwise round in the correct direction.  No underflow is
 459      * possible. If a postnormalization is necessary, then the mantissa is
 460      * all zeros so no shift is needed. */
 461   round:
 462     if(Ext_isnotzero(extent))
 463         {
 464         inexact = true;
 465         switch(Rounding_mode())
 466             {
 467             case ROUNDNEAREST: /* The default. */
 468             if(Ext_isone_sign(extent))
 469                 {
 470                 /* at least 1/2 ulp */
 471                 if(Ext_isnotzero_lower(extent)  ||
 472                   Dbl_isone_lowmantissap2(resultp2))
 473                     {
 474                     /* either exactly half way and odd or more than 1/2ulp */
 475                     Dbl_increment(resultp1,resultp2);
 476                     }
 477                 }
 478             break;
 479
 480             case ROUNDPLUS:
 481             if(Dbl_iszero_sign(resultp1))
 482                 {
 483                 /* Round up positive results */
 484                 Dbl_increment(resultp1,resultp2);
 485                 }
 486             break;
 487
 488             case ROUNDMINUS:
 489             if(Dbl_isone_sign(resultp1))
 490                 {
 491                 /* Round down negative results */
 492                 Dbl_increment(resultp1,resultp2);
 493                 }
 494
 495             case ROUNDZERO:;
 496             /* truncate is simple */
 497             } /* end switch... */
 498         if(Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
 499         }
 500     if(result_exponent == DBL_INFINITY_EXPONENT)
 501         {
 502         /* Overflow */
 503         if(Is_overflowtrap_enabled())
 504             {
 505             Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
 506             Dbl_copytoptr(resultp1,resultp2,dstptr);
 507             if (inexact) {
 508                 if (Is_inexacttrap_enabled())
 509                         return(OVERFLOWEXCEPTION | INEXACTEXCEPTION);
 510                 else
 511                         Set_inexactflag();
 512             }
 513             return(OVERFLOWEXCEPTION);
 514             }
 515         else
 516             {
 517             inexact = true;
 518             Set_overflowflag();
 519             Dbl_setoverflow(resultp1,resultp2);
 520             }
 521         }
 522     else Dbl_set_exponent(resultp1,result_exponent);
 523     Dbl_copytoptr(resultp1,resultp2,dstptr);
 524     if(inexact) {
 525         if(Is_inexacttrap_enabled())
 526             return(INEXACTEXCEPTION);
 527         else
 528             Set_inexactflag();
 529     }
 530     return(NOEXCEPTION);
 531     }