lib/libm/src/s_fma.c

   1 /*      $NetBSD: s_fma.c,v 1.6 2013/02/14 09:24:50 matt Exp $   */
   2
   3 /*-
   4  * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 #if 0
  31 __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.8 2011/10/21 06:30:43 das Exp $");
  32 #else
  33 __RCSID("$NetBSD: s_fma.c,v 1.6 2013/02/14 09:24:50 matt Exp $");
  34 #endif
  35
  36 #include <machine/ieee.h>
  37 #include <fenv.h>
  38 #include <float.h>
  39 #include <math.h>
  40
  41 #include "math_private.h"
  42
  43 #ifndef __HAVE_LONG_DOUBLE
  44 __strong_alias(fmal, fma)
  45 #endif
  46
  47 /*
  48  * A struct dd represents a floating-point number with twice the precision
  49  * of a double.  We maintain the invariant that "hi" stores the 53 high-order
  50  * bits of the result.
  51  */
  52 struct dd {
  53         double hi;
  54         double lo;
  55 };
  56
  57 /*
  58  * Compute a+b exactly, returning the exact result in a struct dd.  We assume
  59  * that both a and b are finite, but make no assumptions about their relative
  60  * magnitudes.
  61  */
  62 static inline struct dd
  63 dd_add(double a, double b)
  64 {
  65         struct dd ret;
  66         double s;
  67
  68         ret.hi = a + b;
  69         s = ret.hi - a;
  70         ret.lo = (a - (ret.hi - s)) + (b - s);
  71         return (ret);
  72 }
  73
  74 /*
  75  * Compute a+b, with a small tweak:  The least significant bit of the
  76  * result is adjusted into a sticky bit summarizing all the bits that
  77  * were lost to rounding.  This adjustment negates the effects of double
  78  * rounding when the result is added to another number with a higher
  79  * exponent.  For an explanation of round and sticky bits, see any reference
  80  * on FPU design, e.g.,
  81  *
  82  *     J. Coonen.  An Implementation Guide to a Proposed Standard for
  83  *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
  84  */
  85 static inline double
  86 add_adjusted(double a, double b)
  87 {
  88         struct dd sum;
  89         uint64_t hibits, lobits;
  90
  91         sum = dd_add(a, b);
  92         if (sum.lo != 0) {
  93                 EXTRACT_WORD64(hibits, sum.hi);
  94                 if ((hibits & 1) == 0) {
  95                         /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
  96                         EXTRACT_WORD64(lobits, sum.lo);
  97                         hibits += 1 - ((hibits ^ lobits) >> 62);
  98                         INSERT_WORD64(sum.hi, hibits);
  99                 }
 100         }
 101         return (sum.hi);
 102 }
 103
 104 /*
 105  * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
 106  * that the result will be subnormal, and care is taken to ensure that
 107  * double rounding does not occur.
 108  */
 109 static inline double
 110 add_and_denormalize(double a, double b, int scale)
 111 {
 112         struct dd sum;
 113         uint64_t hibits, lobits;
 114         int bits_lost;
 115
 116         sum = dd_add(a, b);
 117
 118         /*
 119          * If we are losing at least two bits of accuracy to denormalization,
 120          * then the first lost bit becomes a round bit, and we adjust the
 121          * lowest bit of sum.hi to make it a sticky bit summarizing all the
 122          * bits in sum.lo. With the sticky bit adjusted, the hardware will
 123          * break any ties in the correct direction.
 124          *
 125          * If we are losing only one bit to denormalization, however, we must
 126          * break the ties manually.
 127          */
 128         if (sum.lo != 0) {
 129                 EXTRACT_WORD64(hibits, sum.hi);
 130                 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
 131                 if ((bits_lost != 1) ^ (int)(hibits & 1)) {
 132                         /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
 133                         EXTRACT_WORD64(lobits, sum.lo);
 134                         hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
 135                         INSERT_WORD64(sum.hi, hibits);
 136                 }
 137         }
 138         return (ldexp(sum.hi, scale));
 139 }
 140
 141 /*
 142  * Compute a*b exactly, returning the exact result in a struct dd.  We assume
 143  * that both a and b are normalized, so no underflow or overflow will occur.
 144  * The current rounding mode must be round-to-nearest.
 145  */
 146 static inline struct dd
 147 dd_mul(double a, double b)
 148 {
 149         static const double split = 0x1p27 + 1.0;
 150         struct dd ret;
 151         double ha, hb, la, lb, p, q;
 152
 153         p = a * split;
 154         ha = a - p;
 155         ha += p;
 156         la = a - ha;
 157
 158         p = b * split;
 159         hb = b - p;
 160         hb += p;
 161         lb = b - hb;
 162
 163         p = ha * hb;
 164         q = ha * lb + la * hb;
 165
 166         ret.hi = p + q;
 167         ret.lo = p - ret.hi + q + la * lb;
 168         return (ret);
 169 }
 170
 171 /*
 172  * Fused multiply-add: Compute x * y + z with a single rounding error.
 173  *
 174  * We use scaling to avoid overflow/underflow, along with the
 175  * canonical precision-doubling technique adapted from:
 176  *
 177  *      Dekker, T.  A Floating-Point Technique for Extending the
 178  *      Available Precision.  Numer. Math. 18, 224-242 (1971).
 179  *
 180  * This algorithm is sensitive to the rounding precision.  FPUs such
 181  * as the i387 must be set in double-precision mode if variables are
 182  * to be stored in FP registers in order to avoid incorrect results.
 183  * This is the default on FreeBSD, but not on many other systems.
 184  *
 185  * Hardware instructions should be used on architectures that support it,
 186  * since this implementation will likely be several times slower.
 187  */
 188 double
 189 fma(double x, double y, double z)
 190 {
 191         double xs, ys, zs, adj;
 192         struct dd xy, r;
 193         int oround;
 194         int ex, ey, ez;
 195         int spread;
 196
 197         /*
 198          * Handle special cases. The order of operations and the particular
 199          * return values here are crucial in handling special cases involving
 200          * infinities, NaNs, overflows, and signed zeroes correctly.
 201          */
 202         if (x == 0.0 || y == 0.0)
 203                 return (x * y + z);
 204         if (z == 0.0)
 205                 return (x * y);
 206         if (!isfinite(x) || !isfinite(y))
 207                 return (x * y + z);
 208         if (!isfinite(z))
 209                 return (z);
 210
 211         xs = frexp(x, &ex);
 212         ys = frexp(y, &ey);
 213         zs = frexp(z, &ez);
 214         oround = fegetround();
 215         spread = ex + ey - ez;
 216
 217         /*
 218          * If x * y and z are many orders of magnitude apart, the scaling
 219          * will overflow, so we handle these cases specially.  Rounding
 220          * modes other than FE_TONEAREST are painful.
 221          */
 222         if (spread < -DBL_MANT_DIG) {
 223                 feraiseexcept(FE_INEXACT);
 224                 if (!isnormal(z))
 225                         feraiseexcept(FE_UNDERFLOW);
 226                 switch (oround) {
 227                 case FE_TONEAREST:
 228                         return (z);
 229                 case FE_TOWARDZERO:
 230                         if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0))
 231                                 return (z);
 232                         else
 233                                 return (nextafter(z, 0));
 234                 case FE_DOWNWARD:
 235                         if ((x > 0.0) ^ (y < 0.0))
 236                                 return (z);
 237                         else
 238                                 return (nextafter(z, -INFINITY));
 239                 default:        /* FE_UPWARD */
 240                         if ((x > 0.0) ^ (y < 0.0))
 241                                 return (nextafter(z, INFINITY));
 242                         else
 243                                 return (z);
 244                 }
 245         }
 246         if (spread <= DBL_MANT_DIG * 2)
 247                 zs = ldexp(zs, -spread);
 248         else
 249                 zs = copysign(DBL_MIN, zs);
 250
 251         fesetround(FE_TONEAREST);
 252
 253         /*
 254          * Basic approach for round-to-nearest:
 255          *
 256          *     (xy.hi, xy.lo) = x * y           (exact)
 257          *     (r.hi, r.lo)   = xy.hi + z       (exact)
 258          *     adj = xy.lo + r.lo               (inexact; low bit is sticky)
 259          *     result = r.hi + adj              (correctly rounded)
 260          */
 261         xy = dd_mul(xs, ys);
 262         r = dd_add(xy.hi, zs);
 263
 264         spread = ex + ey;
 265
 266         if (r.hi == 0.0) {
 267                 /*
 268                  * When the addends cancel to 0, ensure that the result has
 269                  * the correct sign.
 270                  */
 271                 fesetround(oround);
 272                 {
 273                 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
 274                 return (xy.hi + vzs + ldexp(xy.lo, spread));
 275                 }
 276         }
 277
 278         if (oround != FE_TONEAREST) {
 279                 /*
 280                  * There is no need to worry about double rounding in directed
 281                  * rounding modes.
 282                  */
 283                 fesetround(oround);
 284                 adj = r.lo + xy.lo;
 285                 return (ldexp(r.hi + adj, spread));
 286         }
 287
 288         adj = add_adjusted(r.lo, xy.lo);
 289         if (spread + ilogb(r.hi) > -1023)
 290                 return (ldexp(r.hi + adj, spread));
 291         else
 292                 return (add_and_denormalize(r.hi, adj, spread));
 293 }